commit ece8a8196b5e2646dda29128db525718ced12454 Author: ModelHub XC Date: Tue Jun 9 11:19:20 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: rbelanec/train_sst2_42_1779194533 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..f1b51d2 --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- full +- llama-factory +- generated_from_trainer +model-index: +- name: train_sst2_42_1779194533 + results: [] +--- + + + +# train_sst2_42_1779194533 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the sst2 dataset. +It achieves the following results on the evaluation set: +- Loss: 0.0970 +- Num Input Tokens Seen: 18647328 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 5 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:-----:|:---------------:|:-----------------:| +| 0.4074 | 0.2501 | 1895 | 0.1552 | 930944 | +| 0.3196 | 0.5002 | 3790 | 0.1577 | 1864128 | +| 0.0028 | 0.7503 | 5685 | 0.0970 | 2790656 | +| 0.0006 | 1.0004 | 7580 | 0.1143 | 3726464 | +| 0.1179 | 1.2505 | 9475 | 0.1166 | 4658240 | +| 0.1073 | 1.5006 | 11370 | 0.1257 | 5591680 | +| 0.342 | 1.7507 | 13265 | 0.1152 | 6528448 | +| 0.0004 | 2.0008 | 15160 | 0.1182 | 7463024 | +| 0.0556 | 2.2509 | 17055 | 0.1500 | 8395632 | +| 0.0962 | 2.5010 | 18950 | 0.1142 | 9326256 | +| 0.0429 | 2.7511 | 20845 | 0.1603 | 10259504 | +| 0.0352 | 3.0012 | 22740 | 0.1483 | 11196096 | +| 0.0352 | 3.2513 | 24635 | 0.1809 | 12128448 | +| 0.0 | 3.5014 | 26530 | 0.1809 | 13069824 | +| 0.0243 | 3.7515 | 28425 | 0.2036 | 13996672 | +| 0.0002 | 4.0016 | 30320 | 0.1816 | 14924944 | +| 0.0087 | 4.2517 | 32215 | 0.2473 | 15859920 | +| 0.0 | 4.5018 | 34110 | 0.2764 | 16790288 | +| 0.0 | 4.7519 | 36005 | 0.2836 | 17721744 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..0b48f5a --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 5.0, + "eval_loss": 0.09698151051998138, + "eval_runtime": 8.0011, + "eval_samples_per_second": 841.76, + "eval_steps_per_second": 105.236, + "num_input_tokens_seen": 18647328, + "total_flos": 1.0887944845433242e+17, + "train_loss": 0.05778941776209685, + "train_runtime": 3377.0005, + "train_samples_per_second": 89.745, + "train_steps_per_second": 11.219 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5a2b93f --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..1b9c379 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "eval_loss": 0.09698151051998138, + "eval_runtime": 8.0011, + "eval_samples_per_second": 841.76, + "eval_steps_per_second": 105.236, + "num_input_tokens_seen": 18647328 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b56fc04 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4bbd3b9a180d790332e718e4fe84f5372702d00714f50a6950ba183895fb7f +size 4943274328 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..7399eb5 --- /dev/null +++ b/train.yaml @@ -0,0 +1,55 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +stage: sft +do_train: true +finetuning_type: full + +### dataset +dataset: sst2 +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/base/llama-3.2-1b-instruct/train_sst2_42_1779194533 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 2.0e-6 +num_train_epochs: 5 +weight_decay: 1.0e-2 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: base_llama-3.2-1b-instruct_train_sst2_42_1779194533 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..8862a00 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 5.0, + "num_input_tokens_seen": 18647328, + "total_flos": 1.0887944845433242e+17, + "train_loss": 0.05778941776209685, + "train_runtime": 3377.0005, + "train_samples_per_second": 89.745, + "train_steps_per_second": 11.219 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..1cc1d3b --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,7597 @@ +{"current_steps": 5, "total_steps": 37885, "loss": 1.4356, "lr": 2.111375032990235e-09, "epoch": 0.0006598917777484492, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "3:34:02", "throughput": 1321.39, "total_tokens": 2240} +{"current_steps": 10, "total_steps": 37885, "loss": 1.5327, "lr": 4.7505938242280285e-09, "epoch": 0.0013197835554968984, "percentage": 0.03, "elapsed_time": "0:00:02", "remaining_time": "2:09:06", "throughput": 2284.35, "total_tokens": 4672} +{"current_steps": 15, "total_steps": 37885, "loss": 1.4513, "lr": 7.389812615465822e-09, "epoch": 0.0019796753332453477, "percentage": 0.04, "elapsed_time": "0:00:02", "remaining_time": "1:39:59", "throughput": 2962.75, "total_tokens": 7040} +{"current_steps": 20, "total_steps": 37885, "loss": 1.4726, "lr": 1.0029031406703616e-08, "epoch": 0.002639567110993797, "percentage": 0.05, "elapsed_time": "0:00:02", "remaining_time": "1:25:21", "throughput": 3548.75, "total_tokens": 9600} +{"current_steps": 25, "total_steps": 37885, "loss": 1.4022, "lr": 1.2668250197941409e-08, "epoch": 0.0032994588887422464, "percentage": 0.07, "elapsed_time": "0:00:03", "remaining_time": "1:16:31", "throughput": 4010.46, "total_tokens": 12160} +{"current_steps": 30, "total_steps": 37885, "loss": 1.5128, "lr": 1.5307468989179204e-08, "epoch": 0.0039593506664906955, "percentage": 0.08, "elapsed_time": "0:00:03", "remaining_time": "1:10:39", "throughput": 4324.5, "total_tokens": 14528} +{"current_steps": 35, "total_steps": 37885, "loss": 1.3482, "lr": 1.7946687780416997e-08, "epoch": 0.004619242444239145, "percentage": 0.09, "elapsed_time": "0:00:03", "remaining_time": "1:06:27", "throughput": 4547.47, "total_tokens": 16768} +{"current_steps": 40, "total_steps": 37885, "loss": 1.4398, "lr": 2.058590657165479e-08, "epoch": 0.005279134221987594, "percentage": 0.11, "elapsed_time": "0:00:04", "remaining_time": "1:03:21", "throughput": 4794.28, "total_tokens": 19264} +{"current_steps": 45, "total_steps": 37885, "loss": 1.4043, "lr": 2.3225125362892583e-08, "epoch": 0.005939025999736044, "percentage": 0.12, "elapsed_time": "0:00:04", "remaining_time": "1:01:04", "throughput": 4964.35, "total_tokens": 21632} +{"current_steps": 50, "total_steps": 37885, "loss": 1.3473, "lr": 2.5864344154130376e-08, "epoch": 0.006598917777484493, "percentage": 0.13, "elapsed_time": "0:00:04", "remaining_time": "0:59:03", "throughput": 5124.89, "total_tokens": 24000} +{"current_steps": 55, "total_steps": 37885, "loss": 1.427, "lr": 2.850356294536817e-08, "epoch": 0.007258809555232942, "percentage": 0.15, "elapsed_time": "0:00:05", "remaining_time": "0:57:28", "throughput": 5284.83, "total_tokens": 26496} +{"current_steps": 60, "total_steps": 37885, "loss": 1.4604, "lr": 3.1142781736605966e-08, "epoch": 0.007918701332981391, "percentage": 0.16, "elapsed_time": "0:00:05", "remaining_time": "0:56:11", "throughput": 5445.53, "total_tokens": 29120} +{"current_steps": 65, "total_steps": 37885, "loss": 1.2511, "lr": 3.378200052784376e-08, "epoch": 0.008578593110729841, "percentage": 0.17, "elapsed_time": "0:00:05", "remaining_time": "0:55:05", "throughput": 5588.02, "total_tokens": 31744} +{"current_steps": 70, "total_steps": 37885, "loss": 1.2233, "lr": 3.6421219319081546e-08, "epoch": 0.00923848488847829, "percentage": 0.18, "elapsed_time": "0:00:06", "remaining_time": "0:54:06", "throughput": 5686.91, "total_tokens": 34176} +{"current_steps": 75, "total_steps": 37885, "loss": 1.2987, "lr": 3.9060438110319346e-08, "epoch": 0.009898376666226739, "percentage": 0.2, "elapsed_time": "0:00:06", "remaining_time": "0:53:18", "throughput": 5810.7, "total_tokens": 36864} +{"current_steps": 80, "total_steps": 37885, "loss": 1.1757, "lr": 4.169965690155713e-08, "epoch": 0.010558268443975187, "percentage": 0.21, "elapsed_time": "0:00:06", "remaining_time": "0:52:34", "throughput": 5906.38, "total_tokens": 39424} +{"current_steps": 85, "total_steps": 37885, "loss": 1.1551, "lr": 4.433887569279493e-08, "epoch": 0.011218160221723637, "percentage": 0.22, "elapsed_time": "0:00:07", "remaining_time": "0:51:57", "throughput": 6007.46, "total_tokens": 42112} +{"current_steps": 90, "total_steps": 37885, "loss": 1.0816, "lr": 4.6978094484032725e-08, "epoch": 0.011878051999472087, "percentage": 0.24, "elapsed_time": "0:00:07", "remaining_time": "0:51:21", "throughput": 6069.53, "total_tokens": 44544} +{"current_steps": 95, "total_steps": 37885, "loss": 0.8812, "lr": 4.961731327527052e-08, "epoch": 0.012537943777220536, "percentage": 0.25, "elapsed_time": "0:00:07", "remaining_time": "0:50:55", "throughput": 6132.86, "total_tokens": 47104} +{"current_steps": 100, "total_steps": 37885, "loss": 0.8313, "lr": 5.225653206650831e-08, "epoch": 0.013197835554968985, "percentage": 0.26, "elapsed_time": "0:00:08", "remaining_time": "0:50:27", "throughput": 6198.38, "total_tokens": 49664} +{"current_steps": 105, "total_steps": 37885, "loss": 0.7819, "lr": 5.4895750857746105e-08, "epoch": 0.013857727332717434, "percentage": 0.28, "elapsed_time": "0:00:08", "remaining_time": "0:50:01", "throughput": 6275.64, "total_tokens": 52352} +{"current_steps": 110, "total_steps": 37885, "loss": 0.746, "lr": 5.75349696489839e-08, "epoch": 0.014517619110465884, "percentage": 0.29, "elapsed_time": "0:00:08", "remaining_time": "0:49:36", "throughput": 6313.1, "total_tokens": 54720} +{"current_steps": 115, "total_steps": 37885, "loss": 0.7562, "lr": 6.01741884402217e-08, "epoch": 0.015177510888214334, "percentage": 0.3, "elapsed_time": "0:00:08", "remaining_time": "0:49:14", "throughput": 6353.76, "total_tokens": 57152} +{"current_steps": 120, "total_steps": 37885, "loss": 0.6604, "lr": 6.281340723145948e-08, "epoch": 0.015837402665962782, "percentage": 0.32, "elapsed_time": "0:00:09", "remaining_time": "0:48:55", "throughput": 6408.76, "total_tokens": 59776} +{"current_steps": 125, "total_steps": 37885, "loss": 0.4326, "lr": 6.545262602269728e-08, "epoch": 0.01649729444371123, "percentage": 0.33, "elapsed_time": "0:00:09", "remaining_time": "0:48:38", "throughput": 6464.85, "total_tokens": 62464} +{"current_steps": 130, "total_steps": 37885, "loss": 0.3566, "lr": 6.809184481393507e-08, "epoch": 0.017157186221459682, "percentage": 0.34, "elapsed_time": "0:00:09", "remaining_time": "0:48:22", "throughput": 6511.73, "total_tokens": 65088} +{"current_steps": 135, "total_steps": 37885, "loss": 0.3399, "lr": 7.073106360517287e-08, "epoch": 0.01781707799920813, "percentage": 0.36, "elapsed_time": "0:00:10", "remaining_time": "0:48:07", "throughput": 6564.21, "total_tokens": 67776} +{"current_steps": 140, "total_steps": 37885, "loss": 0.3479, "lr": 7.337028239641066e-08, "epoch": 0.01847696977695658, "percentage": 0.37, "elapsed_time": "0:00:10", "remaining_time": "0:47:53", "throughput": 6606.29, "total_tokens": 70400} +{"current_steps": 145, "total_steps": 37885, "loss": 0.3225, "lr": 7.600950118764846e-08, "epoch": 0.01913686155470503, "percentage": 0.38, "elapsed_time": "0:00:10", "remaining_time": "0:47:41", "throughput": 6612.95, "total_tokens": 72704} +{"current_steps": 150, "total_steps": 37885, "loss": 0.3282, "lr": 7.864871997888626e-08, "epoch": 0.019796753332453478, "percentage": 0.4, "elapsed_time": "0:00:11", "remaining_time": "0:47:27", "throughput": 6637.39, "total_tokens": 75136} +{"current_steps": 155, "total_steps": 37885, "loss": 0.3013, "lr": 8.128793877012403e-08, "epoch": 0.020456645110201926, "percentage": 0.41, "elapsed_time": "0:00:11", "remaining_time": "0:47:15", "throughput": 6664.25, "total_tokens": 77632} +{"current_steps": 160, "total_steps": 37885, "loss": 0.3084, "lr": 8.392715756136183e-08, "epoch": 0.021116536887950375, "percentage": 0.42, "elapsed_time": "0:00:11", "remaining_time": "0:47:06", "throughput": 6700.13, "total_tokens": 80320} +{"current_steps": 165, "total_steps": 37885, "loss": 0.3253, "lr": 8.656637635259963e-08, "epoch": 0.021776428665698826, "percentage": 0.44, "elapsed_time": "0:00:12", "remaining_time": "0:46:55", "throughput": 6718.42, "total_tokens": 82752} +{"current_steps": 170, "total_steps": 37885, "loss": 0.2936, "lr": 8.920559514383743e-08, "epoch": 0.022436320443447275, "percentage": 0.45, "elapsed_time": "0:00:12", "remaining_time": "0:46:45", "throughput": 6740.9, "total_tokens": 85248} +{"current_steps": 175, "total_steps": 37885, "loss": 0.3018, "lr": 9.184481393507522e-08, "epoch": 0.023096212221195723, "percentage": 0.46, "elapsed_time": "0:00:12", "remaining_time": "0:46:37", "throughput": 6769.47, "total_tokens": 87872} +{"current_steps": 180, "total_steps": 37885, "loss": 0.2907, "lr": 9.4484032726313e-08, "epoch": 0.023756103998944175, "percentage": 0.48, "elapsed_time": "0:00:13", "remaining_time": "0:46:27", "throughput": 6789.68, "total_tokens": 90368} +{"current_steps": 185, "total_steps": 37885, "loss": 0.2362, "lr": 9.71232515175508e-08, "epoch": 0.024415995776692623, "percentage": 0.49, "elapsed_time": "0:00:13", "remaining_time": "0:46:19", "throughput": 6813.68, "total_tokens": 92928} +{"current_steps": 190, "total_steps": 37885, "loss": 0.2169, "lr": 9.976247030878859e-08, "epoch": 0.02507588755444107, "percentage": 0.5, "elapsed_time": "0:00:13", "remaining_time": "0:46:10", "throughput": 6823.31, "total_tokens": 95296} +{"current_steps": 195, "total_steps": 37885, "loss": 0.204, "lr": 1.0240168910002639e-07, "epoch": 0.02573577933218952, "percentage": 0.51, "elapsed_time": "0:00:14", "remaining_time": "0:46:04", "throughput": 6851.79, "total_tokens": 97984} +{"current_steps": 200, "total_steps": 37885, "loss": 0.2406, "lr": 1.0504090789126419e-07, "epoch": 0.02639567110993797, "percentage": 0.53, "elapsed_time": "0:00:14", "remaining_time": "0:45:55", "throughput": 6861.54, "total_tokens": 100352} +{"current_steps": 205, "total_steps": 37885, "loss": 0.2504, "lr": 1.0768012668250196e-07, "epoch": 0.02705556288768642, "percentage": 0.54, "elapsed_time": "0:00:14", "remaining_time": "0:45:48", "throughput": 6853.24, "total_tokens": 102464} +{"current_steps": 210, "total_steps": 37885, "loss": 0.1825, "lr": 1.1031934547373976e-07, "epoch": 0.027715454665434867, "percentage": 0.55, "elapsed_time": "0:00:15", "remaining_time": "0:45:41", "throughput": 6876.48, "total_tokens": 105088} +{"current_steps": 215, "total_steps": 37885, "loss": 0.203, "lr": 1.1295856426497756e-07, "epoch": 0.02837534644318332, "percentage": 0.57, "elapsed_time": "0:00:15", "remaining_time": "0:45:35", "throughput": 6894.69, "total_tokens": 107648} +{"current_steps": 220, "total_steps": 37885, "loss": 0.1882, "lr": 1.1559778305621536e-07, "epoch": 0.029035238220931767, "percentage": 0.58, "elapsed_time": "0:00:15", "remaining_time": "0:45:29", "throughput": 6909.27, "total_tokens": 110144} +{"current_steps": 225, "total_steps": 37885, "loss": 0.202, "lr": 1.1823700184745315e-07, "epoch": 0.029695129998680216, "percentage": 0.59, "elapsed_time": "0:00:16", "remaining_time": "0:45:23", "throughput": 6919.82, "total_tokens": 112576} +{"current_steps": 230, "total_steps": 37885, "loss": 0.2255, "lr": 1.2087622063869096e-07, "epoch": 0.030355021776428667, "percentage": 0.61, "elapsed_time": "0:00:16", "remaining_time": "0:45:18", "throughput": 6942.84, "total_tokens": 115264} +{"current_steps": 235, "total_steps": 37885, "loss": 0.1494, "lr": 1.2351543942992873e-07, "epoch": 0.031014913554177116, "percentage": 0.62, "elapsed_time": "0:00:16", "remaining_time": "0:45:12", "throughput": 6961.75, "total_tokens": 117888} +{"current_steps": 240, "total_steps": 37885, "loss": 0.0743, "lr": 1.2615465822116653e-07, "epoch": 0.031674805331925564, "percentage": 0.63, "elapsed_time": "0:00:17", "remaining_time": "0:45:07", "throughput": 6971.05, "total_tokens": 120320} +{"current_steps": 245, "total_steps": 37885, "loss": 0.1968, "lr": 1.2879387701240433e-07, "epoch": 0.032334697109674015, "percentage": 0.65, "elapsed_time": "0:00:17", "remaining_time": "0:45:01", "throughput": 6976.82, "total_tokens": 122688} +{"current_steps": 250, "total_steps": 37885, "loss": 0.1802, "lr": 1.314330958036421e-07, "epoch": 0.03299458888742246, "percentage": 0.66, "elapsed_time": "0:00:17", "remaining_time": "0:44:56", "throughput": 6991.96, "total_tokens": 125248} +{"current_steps": 255, "total_steps": 37885, "loss": 0.149, "lr": 1.340723145948799e-07, "epoch": 0.03365448066517091, "percentage": 0.67, "elapsed_time": "0:00:18", "remaining_time": "0:44:51", "throughput": 6999.4, "total_tokens": 127680} +{"current_steps": 260, "total_steps": 37885, "loss": 0.0907, "lr": 1.367115333861177e-07, "epoch": 0.034314372442919364, "percentage": 0.69, "elapsed_time": "0:00:18", "remaining_time": "0:44:48", "throughput": 7024.26, "total_tokens": 130496} +{"current_steps": 265, "total_steps": 37885, "loss": 0.1709, "lr": 1.393507521773555e-07, "epoch": 0.03497426422066781, "percentage": 0.7, "elapsed_time": "0:00:18", "remaining_time": "0:44:43", "throughput": 7034.92, "total_tokens": 132992} +{"current_steps": 270, "total_steps": 37885, "loss": 0.1453, "lr": 1.419899709685933e-07, "epoch": 0.03563415599841626, "percentage": 0.71, "elapsed_time": "0:00:19", "remaining_time": "0:44:39", "throughput": 7022.26, "total_tokens": 135040} +{"current_steps": 275, "total_steps": 37885, "loss": 0.1749, "lr": 1.4462918975983108e-07, "epoch": 0.03629404777616471, "percentage": 0.73, "elapsed_time": "0:00:19", "remaining_time": "0:44:34", "throughput": 7035.21, "total_tokens": 137600} +{"current_steps": 280, "total_steps": 37885, "loss": 0.1575, "lr": 1.4726840855106888e-07, "epoch": 0.03695393955391316, "percentage": 0.74, "elapsed_time": "0:00:19", "remaining_time": "0:44:30", "throughput": 7034.77, "total_tokens": 139904} +{"current_steps": 285, "total_steps": 37885, "loss": 0.1957, "lr": 1.4990762734230665e-07, "epoch": 0.03761383133166161, "percentage": 0.75, "elapsed_time": "0:00:20", "remaining_time": "0:44:26", "throughput": 7026.07, "total_tokens": 142016} +{"current_steps": 290, "total_steps": 37885, "loss": 0.0195, "lr": 1.5254684613354445e-07, "epoch": 0.03827372310941006, "percentage": 0.77, "elapsed_time": "0:00:20", "remaining_time": "0:44:22", "throughput": 7038.13, "total_tokens": 144576} +{"current_steps": 295, "total_steps": 37885, "loss": 0.086, "lr": 1.5518606492478225e-07, "epoch": 0.038933614887158505, "percentage": 0.78, "elapsed_time": "0:00:20", "remaining_time": "0:44:18", "throughput": 7039.0, "total_tokens": 146880} +{"current_steps": 300, "total_steps": 37885, "loss": 0.2891, "lr": 1.5782528371602005e-07, "epoch": 0.039593506664906956, "percentage": 0.79, "elapsed_time": "0:00:21", "remaining_time": "0:44:15", "throughput": 7039.47, "total_tokens": 149184} +{"current_steps": 305, "total_steps": 37885, "loss": 0.1615, "lr": 1.6046450250725785e-07, "epoch": 0.0402533984426554, "percentage": 0.81, "elapsed_time": "0:00:21", "remaining_time": "0:44:11", "throughput": 7031.68, "total_tokens": 151296} +{"current_steps": 310, "total_steps": 37885, "loss": 0.2055, "lr": 1.6310372129849565e-07, "epoch": 0.04091329022040385, "percentage": 0.82, "elapsed_time": "0:00:21", "remaining_time": "0:44:07", "throughput": 7034.42, "total_tokens": 153664} +{"current_steps": 315, "total_steps": 37885, "loss": 0.2078, "lr": 1.6574294008973345e-07, "epoch": 0.041573181998152305, "percentage": 0.83, "elapsed_time": "0:00:22", "remaining_time": "0:44:04", "throughput": 7037.54, "total_tokens": 156032} +{"current_steps": 320, "total_steps": 37885, "loss": 0.1392, "lr": 1.6838215888097122e-07, "epoch": 0.04223307377590075, "percentage": 0.84, "elapsed_time": "0:00:22", "remaining_time": "0:44:01", "throughput": 7044.45, "total_tokens": 158528} +{"current_steps": 325, "total_steps": 37885, "loss": 0.1113, "lr": 1.71021377672209e-07, "epoch": 0.0428929655536492, "percentage": 0.86, "elapsed_time": "0:00:22", "remaining_time": "0:43:59", "throughput": 7037.47, "total_tokens": 160704} +{"current_steps": 330, "total_steps": 37885, "loss": 0.178, "lr": 1.736605964634468e-07, "epoch": 0.04355285733139765, "percentage": 0.87, "elapsed_time": "0:00:23", "remaining_time": "0:43:56", "throughput": 7039.06, "total_tokens": 163072} +{"current_steps": 335, "total_steps": 37885, "loss": 0.1961, "lr": 1.762998152546846e-07, "epoch": 0.0442127491091461, "percentage": 0.88, "elapsed_time": "0:00:23", "remaining_time": "0:43:54", "throughput": 7044.0, "total_tokens": 165568} +{"current_steps": 340, "total_steps": 37885, "loss": 0.2253, "lr": 1.789390340459224e-07, "epoch": 0.04487264088689455, "percentage": 0.9, "elapsed_time": "0:00:23", "remaining_time": "0:43:52", "throughput": 7044.54, "total_tokens": 167936} +{"current_steps": 345, "total_steps": 37885, "loss": 0.174, "lr": 1.815782528371602e-07, "epoch": 0.045532532664643, "percentage": 0.91, "elapsed_time": "0:00:24", "remaining_time": "0:43:50", "throughput": 7040.38, "total_tokens": 170176} +{"current_steps": 350, "total_steps": 37885, "loss": 0.1717, "lr": 1.84217471628398e-07, "epoch": 0.046192424442391446, "percentage": 0.92, "elapsed_time": "0:00:24", "remaining_time": "0:43:47", "throughput": 7033.66, "total_tokens": 172352} +{"current_steps": 355, "total_steps": 37885, "loss": 0.1506, "lr": 1.8685669041963577e-07, "epoch": 0.0468523162201399, "percentage": 0.94, "elapsed_time": "0:00:24", "remaining_time": "0:43:46", "throughput": 7050.16, "total_tokens": 175168} +{"current_steps": 360, "total_steps": 37885, "loss": 0.0852, "lr": 1.8949590921087357e-07, "epoch": 0.04751220799788835, "percentage": 0.95, "elapsed_time": "0:00:25", "remaining_time": "0:43:45", "throughput": 7061.91, "total_tokens": 177856} +{"current_steps": 365, "total_steps": 37885, "loss": 0.2049, "lr": 1.9213512800211137e-07, "epoch": 0.048172099775636794, "percentage": 0.96, "elapsed_time": "0:00:25", "remaining_time": "0:43:43", "throughput": 7059.68, "total_tokens": 180160} +{"current_steps": 370, "total_steps": 37885, "loss": 0.1772, "lr": 1.9477434679334917e-07, "epoch": 0.048831991553385246, "percentage": 0.98, "elapsed_time": "0:00:25", "remaining_time": "0:43:41", "throughput": 7068.65, "total_tokens": 182784} +{"current_steps": 375, "total_steps": 37885, "loss": 0.0558, "lr": 1.9741356558458697e-07, "epoch": 0.0494918833311337, "percentage": 0.99, "elapsed_time": "0:00:26", "remaining_time": "0:43:40", "throughput": 7063.16, "total_tokens": 185024} +{"current_steps": 380, "total_steps": 37885, "loss": 0.2301, "lr": 2.0005278437582474e-07, "epoch": 0.05015177510888214, "percentage": 1.0, "elapsed_time": "0:00:26", "remaining_time": "0:43:38", "throughput": 7074.88, "total_tokens": 187712} +{"current_steps": 385, "total_steps": 37885, "loss": 0.179, "lr": 2.0269200316706254e-07, "epoch": 0.050811666886630594, "percentage": 1.02, "elapsed_time": "0:00:26", "remaining_time": "0:43:37", "throughput": 7085.78, "total_tokens": 190400} +{"current_steps": 390, "total_steps": 37885, "loss": 0.1703, "lr": 2.0533122195830032e-07, "epoch": 0.05147155866437904, "percentage": 1.03, "elapsed_time": "0:00:27", "remaining_time": "0:43:36", "throughput": 7102.99, "total_tokens": 193280} +{"current_steps": 395, "total_steps": 37885, "loss": 0.238, "lr": 2.0797044074953812e-07, "epoch": 0.05213145044212749, "percentage": 1.04, "elapsed_time": "0:00:27", "remaining_time": "0:43:34", "throughput": 7100.58, "total_tokens": 195584} +{"current_steps": 400, "total_steps": 37885, "loss": 0.0807, "lr": 2.1060965954077591e-07, "epoch": 0.05279134221987594, "percentage": 1.06, "elapsed_time": "0:00:27", "remaining_time": "0:43:32", "throughput": 7109.02, "total_tokens": 198208} +{"current_steps": 405, "total_steps": 37885, "loss": 0.1271, "lr": 2.1324887833201371e-07, "epoch": 0.05345123399762439, "percentage": 1.07, "elapsed_time": "0:00:28", "remaining_time": "0:43:30", "throughput": 7114.86, "total_tokens": 200704} +{"current_steps": 410, "total_steps": 37885, "loss": 0.1709, "lr": 2.1588809712325151e-07, "epoch": 0.05411112577537284, "percentage": 1.08, "elapsed_time": "0:00:28", "remaining_time": "0:43:28", "throughput": 7118.96, "total_tokens": 203136} +{"current_steps": 415, "total_steps": 37885, "loss": 0.1371, "lr": 2.1852731591448931e-07, "epoch": 0.05477101755312129, "percentage": 1.1, "elapsed_time": "0:00:28", "remaining_time": "0:43:26", "throughput": 7119.25, "total_tokens": 205504} +{"current_steps": 420, "total_steps": 37885, "loss": 0.2497, "lr": 2.2116653470572711e-07, "epoch": 0.055430909330869735, "percentage": 1.11, "elapsed_time": "0:00:29", "remaining_time": "0:43:24", "throughput": 7124.61, "total_tokens": 208000} +{"current_steps": 425, "total_steps": 37885, "loss": 0.1593, "lr": 2.238057534969649e-07, "epoch": 0.056090801108618187, "percentage": 1.12, "elapsed_time": "0:00:29", "remaining_time": "0:43:22", "throughput": 7131.71, "total_tokens": 210560} +{"current_steps": 430, "total_steps": 37885, "loss": 0.2053, "lr": 2.2644497228820266e-07, "epoch": 0.05675069288636664, "percentage": 1.14, "elapsed_time": "0:00:29", "remaining_time": "0:43:20", "throughput": 7136.08, "total_tokens": 213056} +{"current_steps": 435, "total_steps": 37885, "loss": 0.1709, "lr": 2.2908419107944046e-07, "epoch": 0.05741058466411508, "percentage": 1.15, "elapsed_time": "0:00:30", "remaining_time": "0:43:18", "throughput": 7133.38, "total_tokens": 215296} +{"current_steps": 440, "total_steps": 37885, "loss": 0.0204, "lr": 2.3172340987067826e-07, "epoch": 0.058070476441863535, "percentage": 1.16, "elapsed_time": "0:00:30", "remaining_time": "0:43:16", "throughput": 7128.77, "total_tokens": 217472} +{"current_steps": 445, "total_steps": 37885, "loss": 0.0669, "lr": 2.3436262866191606e-07, "epoch": 0.058730368219611986, "percentage": 1.17, "elapsed_time": "0:00:30", "remaining_time": "0:43:14", "throughput": 7134.11, "total_tokens": 219968} +{"current_steps": 450, "total_steps": 37885, "loss": 0.1046, "lr": 2.3700184745315386e-07, "epoch": 0.05939025999736043, "percentage": 1.19, "elapsed_time": "0:00:31", "remaining_time": "0:43:12", "throughput": 7142.83, "total_tokens": 222592} +{"current_steps": 455, "total_steps": 37885, "loss": 0.1579, "lr": 2.3964106624439166e-07, "epoch": 0.06005015177510888, "percentage": 1.2, "elapsed_time": "0:00:31", "remaining_time": "0:43:10", "throughput": 7138.21, "total_tokens": 224768} +{"current_steps": 460, "total_steps": 37885, "loss": 0.238, "lr": 2.4228028503562943e-07, "epoch": 0.060710043552857335, "percentage": 1.21, "elapsed_time": "0:00:31", "remaining_time": "0:43:08", "throughput": 7142.44, "total_tokens": 227264} +{"current_steps": 465, "total_steps": 37885, "loss": 0.0656, "lr": 2.4491950382686726e-07, "epoch": 0.06136993533060578, "percentage": 1.23, "elapsed_time": "0:00:32", "remaining_time": "0:43:06", "throughput": 7147.37, "total_tokens": 229760} +{"current_steps": 470, "total_steps": 37885, "loss": 0.2998, "lr": 2.4755872261810503e-07, "epoch": 0.06202982710835423, "percentage": 1.24, "elapsed_time": "0:00:32", "remaining_time": "0:43:04", "throughput": 7144.9, "total_tokens": 232000} +{"current_steps": 475, "total_steps": 37885, "loss": 0.2337, "lr": 2.501979414093428e-07, "epoch": 0.06268971888610268, "percentage": 1.25, "elapsed_time": "0:00:32", "remaining_time": "0:43:02", "throughput": 7140.36, "total_tokens": 234176} +{"current_steps": 480, "total_steps": 37885, "loss": 0.1524, "lr": 2.528371602005806e-07, "epoch": 0.06334961066385113, "percentage": 1.27, "elapsed_time": "0:00:33", "remaining_time": "0:43:01", "throughput": 7146.84, "total_tokens": 236736} +{"current_steps": 485, "total_steps": 37885, "loss": 0.0146, "lr": 2.554763789918184e-07, "epoch": 0.06400950244159957, "percentage": 1.28, "elapsed_time": "0:00:33", "remaining_time": "0:42:59", "throughput": 7148.07, "total_tokens": 239104} +{"current_steps": 490, "total_steps": 37885, "loss": 0.4542, "lr": 2.581155977830562e-07, "epoch": 0.06466939421934803, "percentage": 1.29, "elapsed_time": "0:00:33", "remaining_time": "0:42:57", "throughput": 7152.45, "total_tokens": 241600} +{"current_steps": 495, "total_steps": 37885, "loss": 0.5945, "lr": 2.60754816574294e-07, "epoch": 0.06532928599709648, "percentage": 1.31, "elapsed_time": "0:00:34", "remaining_time": "0:42:56", "throughput": 7158.65, "total_tokens": 244160} +{"current_steps": 500, "total_steps": 37885, "loss": 0.3712, "lr": 2.633940353655318e-07, "epoch": 0.06598917777484492, "percentage": 1.32, "elapsed_time": "0:00:34", "remaining_time": "0:42:54", "throughput": 7157.58, "total_tokens": 246464} +{"current_steps": 505, "total_steps": 37885, "loss": 0.1548, "lr": 2.660332541567696e-07, "epoch": 0.06664906955259338, "percentage": 1.33, "elapsed_time": "0:00:34", "remaining_time": "0:42:53", "throughput": 7161.61, "total_tokens": 248960} +{"current_steps": 510, "total_steps": 37885, "loss": 0.1575, "lr": 2.686724729480074e-07, "epoch": 0.06730896133034182, "percentage": 1.35, "elapsed_time": "0:00:35", "remaining_time": "0:42:51", "throughput": 7164.56, "total_tokens": 251392} +{"current_steps": 515, "total_steps": 37885, "loss": 0.1289, "lr": 2.7131169173924515e-07, "epoch": 0.06796885310809027, "percentage": 1.36, "elapsed_time": "0:00:35", "remaining_time": "0:42:49", "throughput": 7168.52, "total_tokens": 253888} +{"current_steps": 520, "total_steps": 37885, "loss": 0.1912, "lr": 2.73950910530483e-07, "epoch": 0.06862874488583873, "percentage": 1.37, "elapsed_time": "0:00:35", "remaining_time": "0:42:48", "throughput": 7172.36, "total_tokens": 256384} +{"current_steps": 525, "total_steps": 37885, "loss": 0.2614, "lr": 2.7659012932172075e-07, "epoch": 0.06928863666358717, "percentage": 1.39, "elapsed_time": "0:00:36", "remaining_time": "0:42:47", "throughput": 7165.85, "total_tokens": 258496} +{"current_steps": 530, "total_steps": 37885, "loss": 0.1036, "lr": 2.792293481129586e-07, "epoch": 0.06994852844133562, "percentage": 1.4, "elapsed_time": "0:00:36", "remaining_time": "0:42:45", "throughput": 7166.59, "total_tokens": 260864} +{"current_steps": 535, "total_steps": 37885, "loss": 0.0465, "lr": 2.8186856690419635e-07, "epoch": 0.07060842021908408, "percentage": 1.41, "elapsed_time": "0:00:36", "remaining_time": "0:42:44", "throughput": 7170.15, "total_tokens": 263360} +{"current_steps": 540, "total_steps": 37885, "loss": 0.097, "lr": 2.845077856954342e-07, "epoch": 0.07126831199683252, "percentage": 1.43, "elapsed_time": "0:00:37", "remaining_time": "0:42:43", "throughput": 7179.82, "total_tokens": 266112} +{"current_steps": 545, "total_steps": 37885, "loss": 0.1353, "lr": 2.8714700448667195e-07, "epoch": 0.07192820377458096, "percentage": 1.44, "elapsed_time": "0:00:37", "remaining_time": "0:42:41", "throughput": 7182.05, "total_tokens": 268544} +{"current_steps": 550, "total_steps": 37885, "loss": 0.2927, "lr": 2.897862232779097e-07, "epoch": 0.07258809555232942, "percentage": 1.45, "elapsed_time": "0:00:37", "remaining_time": "0:42:40", "throughput": 7183.01, "total_tokens": 270912} +{"current_steps": 555, "total_steps": 37885, "loss": 0.042, "lr": 2.9242544206914755e-07, "epoch": 0.07324798733007787, "percentage": 1.46, "elapsed_time": "0:00:38", "remaining_time": "0:42:39", "throughput": 7192.34, "total_tokens": 273664} +{"current_steps": 560, "total_steps": 37885, "loss": 0.1713, "lr": 2.950646608603853e-07, "epoch": 0.07390787910782631, "percentage": 1.48, "elapsed_time": "0:00:38", "remaining_time": "0:42:37", "throughput": 7196.13, "total_tokens": 276160} +{"current_steps": 565, "total_steps": 37885, "loss": 0.1153, "lr": 2.977038796516231e-07, "epoch": 0.07456777088557477, "percentage": 1.49, "elapsed_time": "0:00:38", "remaining_time": "0:42:36", "throughput": 7202.85, "total_tokens": 278784} +{"current_steps": 570, "total_steps": 37885, "loss": 0.1525, "lr": 3.0034309844286087e-07, "epoch": 0.07522766266332322, "percentage": 1.5, "elapsed_time": "0:00:39", "remaining_time": "0:42:35", "throughput": 7203.58, "total_tokens": 281152} +{"current_steps": 575, "total_steps": 37885, "loss": 0.2213, "lr": 3.029823172340987e-07, "epoch": 0.07588755444107166, "percentage": 1.52, "elapsed_time": "0:00:39", "remaining_time": "0:42:33", "throughput": 7202.74, "total_tokens": 283456} +{"current_steps": 580, "total_steps": 37885, "loss": 0.1509, "lr": 3.0562153602533647e-07, "epoch": 0.07654744621882012, "percentage": 1.53, "elapsed_time": "0:00:39", "remaining_time": "0:42:32", "throughput": 7207.14, "total_tokens": 286016} +{"current_steps": 585, "total_steps": 37885, "loss": 0.1542, "lr": 3.0826075481657424e-07, "epoch": 0.07720733799656856, "percentage": 1.54, "elapsed_time": "0:00:40", "remaining_time": "0:42:31", "throughput": 7209.31, "total_tokens": 288448} +{"current_steps": 590, "total_steps": 37885, "loss": 0.136, "lr": 3.1089997360781207e-07, "epoch": 0.07786722977431701, "percentage": 1.56, "elapsed_time": "0:00:40", "remaining_time": "0:42:29", "throughput": 7209.91, "total_tokens": 290816} +{"current_steps": 595, "total_steps": 37885, "loss": 0.0768, "lr": 3.1353919239904984e-07, "epoch": 0.07852712155206547, "percentage": 1.57, "elapsed_time": "0:00:40", "remaining_time": "0:42:28", "throughput": 7217.18, "total_tokens": 293504} +{"current_steps": 600, "total_steps": 37885, "loss": 0.1251, "lr": 3.1617841119028767e-07, "epoch": 0.07918701332981391, "percentage": 1.58, "elapsed_time": "0:00:40", "remaining_time": "0:42:27", "throughput": 7214.71, "total_tokens": 295744} +{"current_steps": 605, "total_steps": 37885, "loss": 0.1154, "lr": 3.1881762998152544e-07, "epoch": 0.07984690510756236, "percentage": 1.6, "elapsed_time": "0:00:41", "remaining_time": "0:42:26", "throughput": 7214.84, "total_tokens": 298112} +{"current_steps": 610, "total_steps": 37885, "loss": 0.1348, "lr": 3.2145684877276327e-07, "epoch": 0.0805067968853108, "percentage": 1.61, "elapsed_time": "0:00:41", "remaining_time": "0:42:24", "throughput": 7217.89, "total_tokens": 300608} +{"current_steps": 615, "total_steps": 37885, "loss": 0.0238, "lr": 3.2409606756400104e-07, "epoch": 0.08116668866305926, "percentage": 1.62, "elapsed_time": "0:00:41", "remaining_time": "0:42:24", "throughput": 7226.04, "total_tokens": 303360} +{"current_steps": 620, "total_steps": 37885, "loss": 0.1818, "lr": 3.267352863552388e-07, "epoch": 0.0818265804408077, "percentage": 1.64, "elapsed_time": "0:00:42", "remaining_time": "0:42:23", "throughput": 7230.5, "total_tokens": 305920} +{"current_steps": 625, "total_steps": 37885, "loss": 0.1448, "lr": 3.2937450514647664e-07, "epoch": 0.08248647221855615, "percentage": 1.65, "elapsed_time": "0:00:42", "remaining_time": "0:42:21", "throughput": 7233.47, "total_tokens": 308416} +{"current_steps": 630, "total_steps": 37885, "loss": 0.0859, "lr": 3.320137239377144e-07, "epoch": 0.08314636399630461, "percentage": 1.66, "elapsed_time": "0:00:42", "remaining_time": "0:42:20", "throughput": 7235.24, "total_tokens": 310848} +{"current_steps": 635, "total_steps": 37885, "loss": 0.1894, "lr": 3.3465294272895224e-07, "epoch": 0.08380625577405305, "percentage": 1.68, "elapsed_time": "0:00:43", "remaining_time": "0:42:19", "throughput": 7239.54, "total_tokens": 313408} +{"current_steps": 640, "total_steps": 37885, "loss": 0.2527, "lr": 3.3729216152019e-07, "epoch": 0.0844661475518015, "percentage": 1.69, "elapsed_time": "0:00:43", "remaining_time": "0:42:18", "throughput": 7241.95, "total_tokens": 315904} +{"current_steps": 645, "total_steps": 37885, "loss": 0.2999, "lr": 3.3993138031142784e-07, "epoch": 0.08512603932954996, "percentage": 1.7, "elapsed_time": "0:00:43", "remaining_time": "0:42:17", "throughput": 7238.14, "total_tokens": 318080} +{"current_steps": 650, "total_steps": 37885, "loss": 0.1142, "lr": 3.425705991026656e-07, "epoch": 0.0857859311072984, "percentage": 1.72, "elapsed_time": "0:00:44", "remaining_time": "0:42:15", "throughput": 7234.24, "total_tokens": 320256} +{"current_steps": 655, "total_steps": 37885, "loss": 0.0054, "lr": 3.452098178939034e-07, "epoch": 0.08644582288504685, "percentage": 1.73, "elapsed_time": "0:00:44", "remaining_time": "0:42:14", "throughput": 7231.76, "total_tokens": 322496} +{"current_steps": 660, "total_steps": 37885, "loss": 0.133, "lr": 3.478490366851412e-07, "epoch": 0.0871057146627953, "percentage": 1.74, "elapsed_time": "0:00:44", "remaining_time": "0:42:13", "throughput": 7236.78, "total_tokens": 325120} +{"current_steps": 665, "total_steps": 37885, "loss": 0.0938, "lr": 3.50488255476379e-07, "epoch": 0.08776560644054375, "percentage": 1.76, "elapsed_time": "0:00:45", "remaining_time": "0:42:12", "throughput": 7232.71, "total_tokens": 327296} +{"current_steps": 670, "total_steps": 37885, "loss": 0.492, "lr": 3.5312747426761676e-07, "epoch": 0.0884254982182922, "percentage": 1.77, "elapsed_time": "0:00:45", "remaining_time": "0:42:11", "throughput": 7233.1, "total_tokens": 329664} +{"current_steps": 675, "total_steps": 37885, "loss": 0.2004, "lr": 3.5576669305885453e-07, "epoch": 0.08908538999604065, "percentage": 1.78, "elapsed_time": "0:00:45", "remaining_time": "0:42:11", "throughput": 7240.04, "total_tokens": 332416} +{"current_steps": 680, "total_steps": 37885, "loss": 0.0255, "lr": 3.5840591185009236e-07, "epoch": 0.0897452817737891, "percentage": 1.79, "elapsed_time": "0:00:46", "remaining_time": "0:42:10", "throughput": 7244.0, "total_tokens": 334976} +{"current_steps": 685, "total_steps": 37885, "loss": 0.1744, "lr": 3.6104513064133013e-07, "epoch": 0.09040517355153754, "percentage": 1.81, "elapsed_time": "0:00:46", "remaining_time": "0:42:09", "throughput": 7246.64, "total_tokens": 337472} +{"current_steps": 690, "total_steps": 37885, "loss": 0.0702, "lr": 3.636843494325679e-07, "epoch": 0.091065065329286, "percentage": 1.82, "elapsed_time": "0:00:46", "remaining_time": "0:42:08", "throughput": 7247.86, "total_tokens": 339904} +{"current_steps": 695, "total_steps": 37885, "loss": 0.1571, "lr": 3.6632356822380573e-07, "epoch": 0.09172495710703445, "percentage": 1.83, "elapsed_time": "0:00:47", "remaining_time": "0:42:07", "throughput": 7247.26, "total_tokens": 342272} +{"current_steps": 700, "total_steps": 37885, "loss": 0.0945, "lr": 3.689627870150435e-07, "epoch": 0.09238484888478289, "percentage": 1.85, "elapsed_time": "0:00:47", "remaining_time": "0:42:06", "throughput": 7247.58, "total_tokens": 344640} +{"current_steps": 705, "total_steps": 37885, "loss": 0.1118, "lr": 3.7160200580628133e-07, "epoch": 0.09304474066253135, "percentage": 1.86, "elapsed_time": "0:00:47", "remaining_time": "0:42:05", "throughput": 7248.8, "total_tokens": 347072} +{"current_steps": 710, "total_steps": 37885, "loss": 0.1896, "lr": 3.742412245975191e-07, "epoch": 0.0937046324402798, "percentage": 1.87, "elapsed_time": "0:00:48", "remaining_time": "0:42:04", "throughput": 7253.26, "total_tokens": 349696} +{"current_steps": 715, "total_steps": 37885, "loss": 0.1602, "lr": 3.7688044338875693e-07, "epoch": 0.09436452421802824, "percentage": 1.89, "elapsed_time": "0:00:48", "remaining_time": "0:42:03", "throughput": 7256.55, "total_tokens": 352256} +{"current_steps": 720, "total_steps": 37885, "loss": 0.2044, "lr": 3.795196621799947e-07, "epoch": 0.0950244159957767, "percentage": 1.9, "elapsed_time": "0:00:48", "remaining_time": "0:42:02", "throughput": 7263.44, "total_tokens": 355008} +{"current_steps": 725, "total_steps": 37885, "loss": 0.4465, "lr": 3.821588809712325e-07, "epoch": 0.09568430777352514, "percentage": 1.91, "elapsed_time": "0:00:49", "remaining_time": "0:42:01", "throughput": 7263.2, "total_tokens": 357376} +{"current_steps": 730, "total_steps": 37885, "loss": 0.2807, "lr": 3.847980997624703e-07, "epoch": 0.09634419955127359, "percentage": 1.93, "elapsed_time": "0:00:49", "remaining_time": "0:42:00", "throughput": 7262.29, "total_tokens": 359680} +{"current_steps": 735, "total_steps": 37885, "loss": 0.0482, "lr": 3.874373185537081e-07, "epoch": 0.09700409132902205, "percentage": 1.94, "elapsed_time": "0:00:49", "remaining_time": "0:41:59", "throughput": 7264.54, "total_tokens": 362176} +{"current_steps": 740, "total_steps": 37885, "loss": 0.0361, "lr": 3.900765373449459e-07, "epoch": 0.09766398310677049, "percentage": 1.95, "elapsed_time": "0:00:50", "remaining_time": "0:41:59", "throughput": 7273.04, "total_tokens": 365056} +{"current_steps": 745, "total_steps": 37885, "loss": 0.1913, "lr": 3.927157561361837e-07, "epoch": 0.09832387488451894, "percentage": 1.97, "elapsed_time": "0:00:50", "remaining_time": "0:41:58", "throughput": 7273.89, "total_tokens": 367488} +{"current_steps": 750, "total_steps": 37885, "loss": 0.2768, "lr": 3.953549749274215e-07, "epoch": 0.0989837666622674, "percentage": 1.98, "elapsed_time": "0:00:50", "remaining_time": "0:41:57", "throughput": 7272.62, "total_tokens": 369792} +{"current_steps": 755, "total_steps": 37885, "loss": 0.0962, "lr": 3.979941937186593e-07, "epoch": 0.09964365844001584, "percentage": 1.99, "elapsed_time": "0:00:51", "remaining_time": "0:41:56", "throughput": 7272.38, "total_tokens": 372160} +{"current_steps": 760, "total_steps": 37885, "loss": 0.1613, "lr": 4.0063341250989705e-07, "epoch": 0.10030355021776428, "percentage": 2.01, "elapsed_time": "0:00:51", "remaining_time": "0:41:55", "throughput": 7267.62, "total_tokens": 374272} +{"current_steps": 765, "total_steps": 37885, "loss": 0.1893, "lr": 4.032726313011349e-07, "epoch": 0.10096344199551274, "percentage": 2.02, "elapsed_time": "0:00:51", "remaining_time": "0:41:54", "throughput": 7268.51, "total_tokens": 376704} +{"current_steps": 770, "total_steps": 37885, "loss": 0.321, "lr": 4.0591185009237265e-07, "epoch": 0.10162333377326119, "percentage": 2.03, "elapsed_time": "0:00:52", "remaining_time": "0:41:53", "throughput": 7269.39, "total_tokens": 379136} +{"current_steps": 775, "total_steps": 37885, "loss": 0.0868, "lr": 4.085510688836104e-07, "epoch": 0.10228322555100963, "percentage": 2.05, "elapsed_time": "0:00:52", "remaining_time": "0:41:53", "throughput": 7270.39, "total_tokens": 381568} +{"current_steps": 780, "total_steps": 37885, "loss": 0.2393, "lr": 4.111902876748482e-07, "epoch": 0.10294311732875808, "percentage": 2.06, "elapsed_time": "0:00:52", "remaining_time": "0:41:52", "throughput": 7273.34, "total_tokens": 384128} +{"current_steps": 785, "total_steps": 37885, "loss": 0.3035, "lr": 4.13829506466086e-07, "epoch": 0.10360300910650654, "percentage": 2.07, "elapsed_time": "0:00:53", "remaining_time": "0:41:51", "throughput": 7269.37, "total_tokens": 386304} +{"current_steps": 790, "total_steps": 37885, "loss": 0.2872, "lr": 4.164687252573238e-07, "epoch": 0.10426290088425498, "percentage": 2.09, "elapsed_time": "0:00:53", "remaining_time": "0:41:50", "throughput": 7272.68, "total_tokens": 388864} +{"current_steps": 795, "total_steps": 37885, "loss": 0.0753, "lr": 4.1910794404856157e-07, "epoch": 0.10492279266200343, "percentage": 2.1, "elapsed_time": "0:00:53", "remaining_time": "0:41:49", "throughput": 7270.02, "total_tokens": 391104} +{"current_steps": 800, "total_steps": 37885, "loss": 0.1116, "lr": 4.217471628397994e-07, "epoch": 0.10558268443975188, "percentage": 2.11, "elapsed_time": "0:00:54", "remaining_time": "0:41:49", "throughput": 7276.05, "total_tokens": 393856} +{"current_steps": 805, "total_steps": 37885, "loss": 0.1934, "lr": 4.2438638163103717e-07, "epoch": 0.10624257621750033, "percentage": 2.12, "elapsed_time": "0:00:54", "remaining_time": "0:41:48", "throughput": 7277.72, "total_tokens": 396352} +{"current_steps": 810, "total_steps": 37885, "loss": 0.1459, "lr": 4.27025600422275e-07, "epoch": 0.10690246799524877, "percentage": 2.14, "elapsed_time": "0:00:54", "remaining_time": "0:41:47", "throughput": 7275.05, "total_tokens": 398592} +{"current_steps": 815, "total_steps": 37885, "loss": 0.0887, "lr": 4.2966481921351277e-07, "epoch": 0.10756235977299723, "percentage": 2.15, "elapsed_time": "0:00:55", "remaining_time": "0:41:46", "throughput": 7277.01, "total_tokens": 401088} +{"current_steps": 820, "total_steps": 37885, "loss": 0.0521, "lr": 4.323040380047506e-07, "epoch": 0.10822225155074568, "percentage": 2.16, "elapsed_time": "0:00:55", "remaining_time": "0:41:46", "throughput": 7276.75, "total_tokens": 403456} +{"current_steps": 825, "total_steps": 37885, "loss": 0.2846, "lr": 4.3494325679598837e-07, "epoch": 0.10888214332849412, "percentage": 2.18, "elapsed_time": "0:00:55", "remaining_time": "0:41:45", "throughput": 7274.46, "total_tokens": 405696} +{"current_steps": 830, "total_steps": 37885, "loss": 0.2486, "lr": 4.3758247558722614e-07, "epoch": 0.10954203510624258, "percentage": 2.19, "elapsed_time": "0:00:56", "remaining_time": "0:41:44", "throughput": 7275.36, "total_tokens": 408128} +{"current_steps": 835, "total_steps": 37885, "loss": 0.0355, "lr": 4.4022169437846397e-07, "epoch": 0.11020192688399102, "percentage": 2.2, "elapsed_time": "0:00:56", "remaining_time": "0:41:44", "throughput": 7283.03, "total_tokens": 411008} +{"current_steps": 840, "total_steps": 37885, "loss": 0.3422, "lr": 4.4286091316970174e-07, "epoch": 0.11086181866173947, "percentage": 2.22, "elapsed_time": "0:00:56", "remaining_time": "0:41:43", "throughput": 7281.94, "total_tokens": 413312} +{"current_steps": 845, "total_steps": 37885, "loss": 0.124, "lr": 4.4550013196093957e-07, "epoch": 0.11152171043948793, "percentage": 2.23, "elapsed_time": "0:00:57", "remaining_time": "0:41:42", "throughput": 7280.99, "total_tokens": 415616} +{"current_steps": 850, "total_steps": 37885, "loss": 0.1853, "lr": 4.4813935075217734e-07, "epoch": 0.11218160221723637, "percentage": 2.24, "elapsed_time": "0:00:57", "remaining_time": "0:41:41", "throughput": 7286.5, "total_tokens": 418368} +{"current_steps": 855, "total_steps": 37885, "loss": 0.0833, "lr": 4.5077856954341517e-07, "epoch": 0.11284149399498482, "percentage": 2.26, "elapsed_time": "0:00:57", "remaining_time": "0:41:40", "throughput": 7288.38, "total_tokens": 420864} +{"current_steps": 860, "total_steps": 37885, "loss": 0.222, "lr": 4.5341778833465294e-07, "epoch": 0.11350138577273328, "percentage": 2.27, "elapsed_time": "0:00:58", "remaining_time": "0:41:40", "throughput": 7285.03, "total_tokens": 423040} +{"current_steps": 865, "total_steps": 37885, "loss": 0.3516, "lr": 4.560570071258907e-07, "epoch": 0.11416127755048172, "percentage": 2.28, "elapsed_time": "0:00:58", "remaining_time": "0:41:39", "throughput": 7284.03, "total_tokens": 425344} +{"current_steps": 870, "total_steps": 37885, "loss": 0.1296, "lr": 4.5869622591712854e-07, "epoch": 0.11482116932823017, "percentage": 2.3, "elapsed_time": "0:00:58", "remaining_time": "0:41:38", "throughput": 7287.72, "total_tokens": 427968} +{"current_steps": 875, "total_steps": 37885, "loss": 0.232, "lr": 4.613354447083663e-07, "epoch": 0.11548106110597862, "percentage": 2.31, "elapsed_time": "0:00:59", "remaining_time": "0:41:37", "throughput": 7291.15, "total_tokens": 430592} +{"current_steps": 880, "total_steps": 37885, "loss": 0.4212, "lr": 4.639746634996041e-07, "epoch": 0.11614095288372707, "percentage": 2.32, "elapsed_time": "0:00:59", "remaining_time": "0:41:37", "throughput": 7294.04, "total_tokens": 433152} +{"current_steps": 885, "total_steps": 37885, "loss": 0.0185, "lr": 4.6661388229084186e-07, "epoch": 0.11680084466147551, "percentage": 2.34, "elapsed_time": "0:00:59", "remaining_time": "0:41:36", "throughput": 7292.88, "total_tokens": 435456} +{"current_steps": 890, "total_steps": 37885, "loss": 0.2316, "lr": 4.692531010820797e-07, "epoch": 0.11746073643922397, "percentage": 2.35, "elapsed_time": "0:01:00", "remaining_time": "0:41:35", "throughput": 7296.26, "total_tokens": 438080} +{"current_steps": 895, "total_steps": 37885, "loss": 0.2135, "lr": 4.7189231987331746e-07, "epoch": 0.11812062821697242, "percentage": 2.36, "elapsed_time": "0:01:00", "remaining_time": "0:41:35", "throughput": 7298.89, "total_tokens": 440640} +{"current_steps": 900, "total_steps": 37885, "loss": 0.0402, "lr": 4.7453153866455523e-07, "epoch": 0.11878051999472086, "percentage": 2.38, "elapsed_time": "0:01:00", "remaining_time": "0:41:34", "throughput": 7298.96, "total_tokens": 443008} +{"current_steps": 905, "total_steps": 37885, "loss": 0.1973, "lr": 4.771707574557931e-07, "epoch": 0.11944041177246932, "percentage": 2.39, "elapsed_time": "0:01:01", "remaining_time": "0:41:33", "throughput": 7297.74, "total_tokens": 445312} +{"current_steps": 910, "total_steps": 37885, "loss": 0.105, "lr": 4.798099762470308e-07, "epoch": 0.12010030355021777, "percentage": 2.4, "elapsed_time": "0:01:01", "remaining_time": "0:41:32", "throughput": 7297.65, "total_tokens": 447680} +{"current_steps": 915, "total_steps": 37885, "loss": 0.1353, "lr": 4.824491950382686e-07, "epoch": 0.12076019532796621, "percentage": 2.42, "elapsed_time": "0:01:01", "remaining_time": "0:41:32", "throughput": 7301.9, "total_tokens": 450368} +{"current_steps": 920, "total_steps": 37885, "loss": 0.0864, "lr": 4.850884138295065e-07, "epoch": 0.12142008710571467, "percentage": 2.43, "elapsed_time": "0:01:02", "remaining_time": "0:41:31", "throughput": 7302.46, "total_tokens": 452800} +{"current_steps": 925, "total_steps": 37885, "loss": 0.1168, "lr": 4.877276326207443e-07, "epoch": 0.12207997888346311, "percentage": 2.44, "elapsed_time": "0:01:02", "remaining_time": "0:41:31", "throughput": 7310.22, "total_tokens": 455744} +{"current_steps": 930, "total_steps": 37885, "loss": 0.1899, "lr": 4.90366851411982e-07, "epoch": 0.12273987066121156, "percentage": 2.45, "elapsed_time": "0:01:02", "remaining_time": "0:41:30", "throughput": 7310.75, "total_tokens": 458176} +{"current_steps": 935, "total_steps": 37885, "loss": 0.1902, "lr": 4.930060702032198e-07, "epoch": 0.12339976243896002, "percentage": 2.47, "elapsed_time": "0:01:02", "remaining_time": "0:41:29", "throughput": 7309.7, "total_tokens": 460480} +{"current_steps": 940, "total_steps": 37885, "loss": 0.169, "lr": 4.956452889944576e-07, "epoch": 0.12405965421670846, "percentage": 2.48, "elapsed_time": "0:01:03", "remaining_time": "0:41:28", "throughput": 7306.69, "total_tokens": 462656} +{"current_steps": 945, "total_steps": 37885, "loss": 0.2648, "lr": 4.982845077856955e-07, "epoch": 0.1247195459944569, "percentage": 2.49, "elapsed_time": "0:01:03", "remaining_time": "0:41:28", "throughput": 7310.85, "total_tokens": 465344} +{"current_steps": 950, "total_steps": 37885, "loss": 0.3667, "lr": 5.009237265769331e-07, "epoch": 0.12537943777220537, "percentage": 2.51, "elapsed_time": "0:01:03", "remaining_time": "0:41:27", "throughput": 7310.54, "total_tokens": 467712} +{"current_steps": 955, "total_steps": 37885, "loss": 0.2885, "lr": 5.03562945368171e-07, "epoch": 0.1260393295499538, "percentage": 2.52, "elapsed_time": "0:01:04", "remaining_time": "0:41:26", "throughput": 7310.49, "total_tokens": 470080} +{"current_steps": 960, "total_steps": 37885, "loss": 0.1084, "lr": 5.062021641594088e-07, "epoch": 0.12669922132770225, "percentage": 2.53, "elapsed_time": "0:01:04", "remaining_time": "0:41:25", "throughput": 7311.11, "total_tokens": 472512} +{"current_steps": 965, "total_steps": 37885, "loss": 0.1628, "lr": 5.088413829506465e-07, "epoch": 0.1273591131054507, "percentage": 2.55, "elapsed_time": "0:01:04", "remaining_time": "0:41:25", "throughput": 7309.17, "total_tokens": 474752} +{"current_steps": 970, "total_steps": 37885, "loss": 0.0777, "lr": 5.114806017418843e-07, "epoch": 0.12801900488319914, "percentage": 2.56, "elapsed_time": "0:01:05", "remaining_time": "0:41:24", "throughput": 7312.87, "total_tokens": 477440} +{"current_steps": 975, "total_steps": 37885, "loss": 0.1169, "lr": 5.141198205331222e-07, "epoch": 0.12867889666094762, "percentage": 2.57, "elapsed_time": "0:01:05", "remaining_time": "0:41:24", "throughput": 7314.9, "total_tokens": 480000} +{"current_steps": 980, "total_steps": 37885, "loss": 0.2261, "lr": 5.1675903932436e-07, "epoch": 0.12933878843869606, "percentage": 2.59, "elapsed_time": "0:01:05", "remaining_time": "0:41:23", "throughput": 7314.42, "total_tokens": 482368} +{"current_steps": 985, "total_steps": 37885, "loss": 0.0063, "lr": 5.193982581155977e-07, "epoch": 0.1299986802164445, "percentage": 2.6, "elapsed_time": "0:01:06", "remaining_time": "0:41:22", "throughput": 7310.75, "total_tokens": 484544} +{"current_steps": 990, "total_steps": 37885, "loss": 0.1087, "lr": 5.220374769068355e-07, "epoch": 0.13065857199419295, "percentage": 2.61, "elapsed_time": "0:01:06", "remaining_time": "0:41:22", "throughput": 7307.81, "total_tokens": 486720} +{"current_steps": 995, "total_steps": 37885, "loss": 0.0935, "lr": 5.246766956980734e-07, "epoch": 0.1313184637719414, "percentage": 2.63, "elapsed_time": "0:01:06", "remaining_time": "0:41:21", "throughput": 7310.74, "total_tokens": 489344} +{"current_steps": 1000, "total_steps": 37885, "loss": 0.0785, "lr": 5.273159144893111e-07, "epoch": 0.13197835554968984, "percentage": 2.64, "elapsed_time": "0:01:07", "remaining_time": "0:41:21", "throughput": 7311.2, "total_tokens": 491776} +{"current_steps": 1005, "total_steps": 37885, "loss": 0.1646, "lr": 5.29955133280549e-07, "epoch": 0.1326382473274383, "percentage": 2.65, "elapsed_time": "0:01:07", "remaining_time": "0:41:20", "throughput": 7308.09, "total_tokens": 493952} +{"current_steps": 1010, "total_steps": 37885, "loss": 0.2601, "lr": 5.325943520717867e-07, "epoch": 0.13329813910518676, "percentage": 2.67, "elapsed_time": "0:01:07", "remaining_time": "0:41:19", "throughput": 7307.46, "total_tokens": 496320} +{"current_steps": 1015, "total_steps": 37885, "loss": 0.075, "lr": 5.352335708630246e-07, "epoch": 0.1339580308829352, "percentage": 2.68, "elapsed_time": "0:01:08", "remaining_time": "0:41:19", "throughput": 7304.33, "total_tokens": 498496} +{"current_steps": 1020, "total_steps": 37885, "loss": 0.3792, "lr": 5.378727896542623e-07, "epoch": 0.13461792266068365, "percentage": 2.69, "elapsed_time": "0:01:08", "remaining_time": "0:41:18", "throughput": 7306.53, "total_tokens": 501056} +{"current_steps": 1025, "total_steps": 37885, "loss": 0.0633, "lr": 5.405120084455001e-07, "epoch": 0.1352778144384321, "percentage": 2.71, "elapsed_time": "0:01:08", "remaining_time": "0:41:17", "throughput": 7304.66, "total_tokens": 503296} +{"current_steps": 1030, "total_steps": 37885, "loss": 0.2425, "lr": 5.431512272367379e-07, "epoch": 0.13593770621618054, "percentage": 2.72, "elapsed_time": "0:01:09", "remaining_time": "0:41:17", "throughput": 7307.14, "total_tokens": 505856} +{"current_steps": 1035, "total_steps": 37885, "loss": 0.2077, "lr": 5.457904460279758e-07, "epoch": 0.13659759799392898, "percentage": 2.73, "elapsed_time": "0:01:09", "remaining_time": "0:41:16", "throughput": 7309.32, "total_tokens": 508416} +{"current_steps": 1040, "total_steps": 37885, "loss": 0.1821, "lr": 5.484296648192135e-07, "epoch": 0.13725748977167745, "percentage": 2.75, "elapsed_time": "0:01:09", "remaining_time": "0:41:16", "throughput": 7311.93, "total_tokens": 511040} +{"current_steps": 1045, "total_steps": 37885, "loss": 0.0126, "lr": 5.510688836104512e-07, "epoch": 0.1379173815494259, "percentage": 2.76, "elapsed_time": "0:01:10", "remaining_time": "0:41:15", "throughput": 7309.03, "total_tokens": 513216} +{"current_steps": 1050, "total_steps": 37885, "loss": 0.2747, "lr": 5.537081024016891e-07, "epoch": 0.13857727332717434, "percentage": 2.77, "elapsed_time": "0:01:10", "remaining_time": "0:41:14", "throughput": 7307.2, "total_tokens": 515456} +{"current_steps": 1055, "total_steps": 37885, "loss": 0.2289, "lr": 5.563473211929268e-07, "epoch": 0.1392371651049228, "percentage": 2.78, "elapsed_time": "0:01:10", "remaining_time": "0:41:14", "throughput": 7306.71, "total_tokens": 517824} +{"current_steps": 1060, "total_steps": 37885, "loss": 0.1583, "lr": 5.589865399841647e-07, "epoch": 0.13989705688267123, "percentage": 2.8, "elapsed_time": "0:01:11", "remaining_time": "0:41:13", "throughput": 7308.03, "total_tokens": 520320} +{"current_steps": 1065, "total_steps": 37885, "loss": 0.0417, "lr": 5.616257587754024e-07, "epoch": 0.14055694866041968, "percentage": 2.81, "elapsed_time": "0:01:11", "remaining_time": "0:41:12", "throughput": 7308.65, "total_tokens": 522752} +{"current_steps": 1070, "total_steps": 37885, "loss": 0.0679, "lr": 5.642649775666402e-07, "epoch": 0.14121684043816815, "percentage": 2.82, "elapsed_time": "0:01:11", "remaining_time": "0:41:12", "throughput": 7311.26, "total_tokens": 525376} +{"current_steps": 1075, "total_steps": 37885, "loss": 0.2223, "lr": 5.66904196357878e-07, "epoch": 0.1418767322159166, "percentage": 2.84, "elapsed_time": "0:01:12", "remaining_time": "0:41:11", "throughput": 7311.97, "total_tokens": 527808} +{"current_steps": 1080, "total_steps": 37885, "loss": 0.1876, "lr": 5.695434151491159e-07, "epoch": 0.14253662399366504, "percentage": 2.85, "elapsed_time": "0:01:12", "remaining_time": "0:41:11", "throughput": 7313.37, "total_tokens": 530304} +{"current_steps": 1085, "total_steps": 37885, "loss": 0.375, "lr": 5.721826339403536e-07, "epoch": 0.14319651577141349, "percentage": 2.86, "elapsed_time": "0:01:12", "remaining_time": "0:41:10", "throughput": 7310.34, "total_tokens": 532480} +{"current_steps": 1090, "total_steps": 37885, "loss": 0.2028, "lr": 5.748218527315914e-07, "epoch": 0.14385640754916193, "percentage": 2.88, "elapsed_time": "0:01:13", "remaining_time": "0:41:09", "throughput": 7314.23, "total_tokens": 535168} +{"current_steps": 1095, "total_steps": 37885, "loss": 0.3046, "lr": 5.774610715228292e-07, "epoch": 0.14451629932691037, "percentage": 2.89, "elapsed_time": "0:01:13", "remaining_time": "0:41:09", "throughput": 7314.45, "total_tokens": 537600} +{"current_steps": 1100, "total_steps": 37885, "loss": 0.043, "lr": 5.801002903140671e-07, "epoch": 0.14517619110465885, "percentage": 2.9, "elapsed_time": "0:01:13", "remaining_time": "0:41:08", "throughput": 7315.74, "total_tokens": 540096} +{"current_steps": 1105, "total_steps": 37885, "loss": 0.0119, "lr": 5.827395091053047e-07, "epoch": 0.1458360828824073, "percentage": 2.92, "elapsed_time": "0:01:14", "remaining_time": "0:41:08", "throughput": 7317.07, "total_tokens": 542592} +{"current_steps": 1110, "total_steps": 37885, "loss": 0.0392, "lr": 5.853787278965426e-07, "epoch": 0.14649597466015574, "percentage": 2.93, "elapsed_time": "0:01:14", "remaining_time": "0:41:07", "throughput": 7316.63, "total_tokens": 544960} +{"current_steps": 1115, "total_steps": 37885, "loss": 0.0851, "lr": 5.880179466877804e-07, "epoch": 0.14715586643790418, "percentage": 2.94, "elapsed_time": "0:01:14", "remaining_time": "0:41:06", "throughput": 7313.9, "total_tokens": 547136} +{"current_steps": 1120, "total_steps": 37885, "loss": 0.1365, "lr": 5.906571654790183e-07, "epoch": 0.14781575821565263, "percentage": 2.96, "elapsed_time": "0:01:15", "remaining_time": "0:41:06", "throughput": 7312.64, "total_tokens": 549440} +{"current_steps": 1125, "total_steps": 37885, "loss": 0.2707, "lr": 5.932963842702559e-07, "epoch": 0.14847564999340107, "percentage": 2.97, "elapsed_time": "0:01:15", "remaining_time": "0:41:05", "throughput": 7315.4, "total_tokens": 552064} +{"current_steps": 1130, "total_steps": 37885, "loss": 0.0997, "lr": 5.959356030614938e-07, "epoch": 0.14913554177114954, "percentage": 2.98, "elapsed_time": "0:01:15", "remaining_time": "0:41:05", "throughput": 7315.01, "total_tokens": 554432} +{"current_steps": 1135, "total_steps": 37885, "loss": 0.0028, "lr": 5.985748218527316e-07, "epoch": 0.149795433548898, "percentage": 3.0, "elapsed_time": "0:01:16", "remaining_time": "0:41:04", "throughput": 7314.92, "total_tokens": 556800} +{"current_steps": 1140, "total_steps": 37885, "loss": 0.2855, "lr": 6.012140406439695e-07, "epoch": 0.15045532532664643, "percentage": 3.01, "elapsed_time": "0:01:16", "remaining_time": "0:41:04", "throughput": 7315.83, "total_tokens": 559296} +{"current_steps": 1145, "total_steps": 37885, "loss": 0.1093, "lr": 6.038532594352071e-07, "epoch": 0.15111521710439488, "percentage": 3.02, "elapsed_time": "0:01:16", "remaining_time": "0:41:03", "throughput": 7320.67, "total_tokens": 562112} +{"current_steps": 1150, "total_steps": 37885, "loss": 0.0592, "lr": 6.064924782264449e-07, "epoch": 0.15177510888214332, "percentage": 3.04, "elapsed_time": "0:01:17", "remaining_time": "0:41:03", "throughput": 7317.65, "total_tokens": 564288} +{"current_steps": 1155, "total_steps": 37885, "loss": 0.188, "lr": 6.091316970176828e-07, "epoch": 0.15243500065989177, "percentage": 3.05, "elapsed_time": "0:01:17", "remaining_time": "0:41:02", "throughput": 7319.59, "total_tokens": 566848} +{"current_steps": 1160, "total_steps": 37885, "loss": 0.4772, "lr": 6.117709158089205e-07, "epoch": 0.15309489243764024, "percentage": 3.06, "elapsed_time": "0:01:17", "remaining_time": "0:41:02", "throughput": 7318.35, "total_tokens": 569152} +{"current_steps": 1165, "total_steps": 37885, "loss": 0.0848, "lr": 6.144101346001583e-07, "epoch": 0.15375478421538868, "percentage": 3.08, "elapsed_time": "0:01:18", "remaining_time": "0:41:01", "throughput": 7317.39, "total_tokens": 571456} +{"current_steps": 1170, "total_steps": 37885, "loss": 0.2235, "lr": 6.170493533913961e-07, "epoch": 0.15441467599313713, "percentage": 3.09, "elapsed_time": "0:01:18", "remaining_time": "0:41:00", "throughput": 7317.3, "total_tokens": 573824} +{"current_steps": 1175, "total_steps": 37885, "loss": 0.0517, "lr": 6.196885721826339e-07, "epoch": 0.15507456777088557, "percentage": 3.1, "elapsed_time": "0:01:18", "remaining_time": "0:41:00", "throughput": 7316.06, "total_tokens": 576128} +{"current_steps": 1180, "total_steps": 37885, "loss": 0.1252, "lr": 6.223277909738716e-07, "epoch": 0.15573445954863402, "percentage": 3.11, "elapsed_time": "0:01:19", "remaining_time": "0:40:59", "throughput": 7315.14, "total_tokens": 578432} +{"current_steps": 1185, "total_steps": 37885, "loss": 0.2063, "lr": 6.249670097651095e-07, "epoch": 0.15639435132638246, "percentage": 3.13, "elapsed_time": "0:01:19", "remaining_time": "0:40:59", "throughput": 7314.05, "total_tokens": 580736} +{"current_steps": 1190, "total_steps": 37885, "loss": 0.0011, "lr": 6.276062285563473e-07, "epoch": 0.15705424310413094, "percentage": 3.14, "elapsed_time": "0:01:19", "remaining_time": "0:40:58", "throughput": 7313.24, "total_tokens": 583040} +{"current_steps": 1195, "total_steps": 37885, "loss": 0.0065, "lr": 6.302454473475851e-07, "epoch": 0.15771413488187938, "percentage": 3.15, "elapsed_time": "0:01:20", "remaining_time": "0:40:57", "throughput": 7312.08, "total_tokens": 585344} +{"current_steps": 1200, "total_steps": 37885, "loss": 0.2037, "lr": 6.328846661388228e-07, "epoch": 0.15837402665962783, "percentage": 3.17, "elapsed_time": "0:01:20", "remaining_time": "0:40:57", "throughput": 7313.91, "total_tokens": 587904} +{"current_steps": 1205, "total_steps": 37885, "loss": 0.2007, "lr": 6.355238849300607e-07, "epoch": 0.15903391843737627, "percentage": 3.18, "elapsed_time": "0:01:20", "remaining_time": "0:40:56", "throughput": 7312.86, "total_tokens": 590208} +{"current_steps": 1210, "total_steps": 37885, "loss": 0.1552, "lr": 6.381631037212984e-07, "epoch": 0.15969381021512472, "percentage": 3.19, "elapsed_time": "0:01:21", "remaining_time": "0:40:56", "throughput": 7311.99, "total_tokens": 592512} +{"current_steps": 1215, "total_steps": 37885, "loss": 0.2204, "lr": 6.408023225125363e-07, "epoch": 0.16035370199287316, "percentage": 3.21, "elapsed_time": "0:01:21", "remaining_time": "0:40:55", "throughput": 7313.63, "total_tokens": 595072} +{"current_steps": 1220, "total_steps": 37885, "loss": 0.2003, "lr": 6.43441541303774e-07, "epoch": 0.1610135937706216, "percentage": 3.22, "elapsed_time": "0:01:21", "remaining_time": "0:40:55", "throughput": 7315.11, "total_tokens": 597632} +{"current_steps": 1225, "total_steps": 37885, "loss": 0.0941, "lr": 6.460807600950119e-07, "epoch": 0.16167348554837008, "percentage": 3.23, "elapsed_time": "0:01:22", "remaining_time": "0:40:54", "throughput": 7316.95, "total_tokens": 600192} +{"current_steps": 1230, "total_steps": 37885, "loss": 0.2021, "lr": 6.487199788862496e-07, "epoch": 0.16233337732611852, "percentage": 3.25, "elapsed_time": "0:01:22", "remaining_time": "0:40:54", "throughput": 7313.77, "total_tokens": 602304} +{"current_steps": 1235, "total_steps": 37885, "loss": 0.2325, "lr": 6.513591976774875e-07, "epoch": 0.16299326910386697, "percentage": 3.26, "elapsed_time": "0:01:22", "remaining_time": "0:40:53", "throughput": 7312.87, "total_tokens": 604608} +{"current_steps": 1240, "total_steps": 37885, "loss": 0.1003, "lr": 6.539984164687252e-07, "epoch": 0.1636531608816154, "percentage": 3.27, "elapsed_time": "0:01:23", "remaining_time": "0:40:53", "throughput": 7312.24, "total_tokens": 606976} +{"current_steps": 1245, "total_steps": 37885, "loss": 0.0153, "lr": 6.566376352599631e-07, "epoch": 0.16431305265936386, "percentage": 3.29, "elapsed_time": "0:01:23", "remaining_time": "0:40:52", "throughput": 7314.72, "total_tokens": 609600} +{"current_steps": 1250, "total_steps": 37885, "loss": 0.2758, "lr": 6.592768540512008e-07, "epoch": 0.1649729444371123, "percentage": 3.3, "elapsed_time": "0:01:23", "remaining_time": "0:40:51", "throughput": 7313.14, "total_tokens": 611840} +{"current_steps": 1255, "total_steps": 37885, "loss": 0.1107, "lr": 6.619160728424386e-07, "epoch": 0.16563283621486077, "percentage": 3.31, "elapsed_time": "0:01:23", "remaining_time": "0:40:51", "throughput": 7316.33, "total_tokens": 614528} +{"current_steps": 1260, "total_steps": 37885, "loss": 0.0118, "lr": 6.645552916336764e-07, "epoch": 0.16629272799260922, "percentage": 3.33, "elapsed_time": "0:01:24", "remaining_time": "0:40:51", "throughput": 7317.53, "total_tokens": 617024} +{"current_steps": 1265, "total_steps": 37885, "loss": 0.156, "lr": 6.671945104249141e-07, "epoch": 0.16695261977035766, "percentage": 3.34, "elapsed_time": "0:01:24", "remaining_time": "0:40:50", "throughput": 7317.29, "total_tokens": 619392} +{"current_steps": 1270, "total_steps": 37885, "loss": 0.179, "lr": 6.69833729216152e-07, "epoch": 0.1676125115481061, "percentage": 3.35, "elapsed_time": "0:01:24", "remaining_time": "0:40:49", "throughput": 7318.25, "total_tokens": 621888} +{"current_steps": 1275, "total_steps": 37885, "loss": 0.1028, "lr": 6.724729480073898e-07, "epoch": 0.16827240332585455, "percentage": 3.37, "elapsed_time": "0:01:25", "remaining_time": "0:40:49", "throughput": 7317.34, "total_tokens": 624192} +{"current_steps": 1280, "total_steps": 37885, "loss": 0.0813, "lr": 6.751121667986275e-07, "epoch": 0.168932295103603, "percentage": 3.38, "elapsed_time": "0:01:25", "remaining_time": "0:40:48", "throughput": 7317.49, "total_tokens": 626624} +{"current_steps": 1285, "total_steps": 37885, "loss": 0.0655, "lr": 6.777513855898653e-07, "epoch": 0.16959218688135147, "percentage": 3.39, "elapsed_time": "0:01:25", "remaining_time": "0:40:48", "throughput": 7316.37, "total_tokens": 628928} +{"current_steps": 1290, "total_steps": 37885, "loss": 0.1142, "lr": 6.803906043811032e-07, "epoch": 0.17025207865909991, "percentage": 3.41, "elapsed_time": "0:01:26", "remaining_time": "0:40:47", "throughput": 7319.43, "total_tokens": 631616} +{"current_steps": 1295, "total_steps": 37885, "loss": 0.045, "lr": 6.83029823172341e-07, "epoch": 0.17091197043684836, "percentage": 3.42, "elapsed_time": "0:01:26", "remaining_time": "0:40:47", "throughput": 7320.27, "total_tokens": 634112} +{"current_steps": 1300, "total_steps": 37885, "loss": 0.2547, "lr": 6.856690419635787e-07, "epoch": 0.1715718622145968, "percentage": 3.43, "elapsed_time": "0:01:26", "remaining_time": "0:40:46", "throughput": 7319.28, "total_tokens": 636416} +{"current_steps": 1305, "total_steps": 37885, "loss": 0.0028, "lr": 6.883082607548165e-07, "epoch": 0.17223175399234525, "percentage": 3.44, "elapsed_time": "0:01:27", "remaining_time": "0:40:46", "throughput": 7319.92, "total_tokens": 638848} +{"current_steps": 1310, "total_steps": 37885, "loss": 0.0734, "lr": 6.909474795460544e-07, "epoch": 0.1728916457700937, "percentage": 3.46, "elapsed_time": "0:01:27", "remaining_time": "0:40:45", "throughput": 7319.8, "total_tokens": 641216} +{"current_steps": 1315, "total_steps": 37885, "loss": 0.0042, "lr": 6.935866983372921e-07, "epoch": 0.17355153754784217, "percentage": 3.47, "elapsed_time": "0:01:27", "remaining_time": "0:40:45", "throughput": 7324.02, "total_tokens": 644032} +{"current_steps": 1320, "total_steps": 37885, "loss": 0.1632, "lr": 6.962259171285299e-07, "epoch": 0.1742114293255906, "percentage": 3.48, "elapsed_time": "0:01:28", "remaining_time": "0:40:45", "throughput": 7324.86, "total_tokens": 646528} +{"current_steps": 1325, "total_steps": 37885, "loss": 0.1357, "lr": 6.988651359197677e-07, "epoch": 0.17487132110333906, "percentage": 3.5, "elapsed_time": "0:01:28", "remaining_time": "0:40:44", "throughput": 7326.48, "total_tokens": 649088} +{"current_steps": 1330, "total_steps": 37885, "loss": 0.0358, "lr": 7.015043547110056e-07, "epoch": 0.1755312128810875, "percentage": 3.51, "elapsed_time": "0:01:28", "remaining_time": "0:40:44", "throughput": 7326.83, "total_tokens": 651520} +{"current_steps": 1335, "total_steps": 37885, "loss": 0.1195, "lr": 7.041435735022433e-07, "epoch": 0.17619110465883595, "percentage": 3.52, "elapsed_time": "0:01:29", "remaining_time": "0:40:43", "throughput": 7327.85, "total_tokens": 654016} +{"current_steps": 1340, "total_steps": 37885, "loss": 0.0002, "lr": 7.067827922934811e-07, "epoch": 0.1768509964365844, "percentage": 3.54, "elapsed_time": "0:01:29", "remaining_time": "0:40:42", "throughput": 7326.97, "total_tokens": 656320} +{"current_steps": 1345, "total_steps": 37885, "loss": 0.2259, "lr": 7.094220110847189e-07, "epoch": 0.17751088821433286, "percentage": 3.55, "elapsed_time": "0:01:29", "remaining_time": "0:40:42", "throughput": 7328.71, "total_tokens": 658880} +{"current_steps": 1350, "total_steps": 37885, "loss": 0.0003, "lr": 7.120612298759568e-07, "epoch": 0.1781707799920813, "percentage": 3.56, "elapsed_time": "0:01:30", "remaining_time": "0:40:41", "throughput": 7327.64, "total_tokens": 661184} +{"current_steps": 1355, "total_steps": 37885, "loss": 0.0801, "lr": 7.147004486671945e-07, "epoch": 0.17883067176982975, "percentage": 3.58, "elapsed_time": "0:01:30", "remaining_time": "0:40:41", "throughput": 7328.54, "total_tokens": 663680} +{"current_steps": 1360, "total_steps": 37885, "loss": 0.0336, "lr": 7.173396674584322e-07, "epoch": 0.1794905635475782, "percentage": 3.59, "elapsed_time": "0:01:30", "remaining_time": "0:40:40", "throughput": 7328.3, "total_tokens": 666048} +{"current_steps": 1365, "total_steps": 37885, "loss": 0.1074, "lr": 7.199788862496701e-07, "epoch": 0.18015045532532664, "percentage": 3.6, "elapsed_time": "0:01:31", "remaining_time": "0:40:40", "throughput": 7328.58, "total_tokens": 668480} +{"current_steps": 1370, "total_steps": 37885, "loss": 0.2056, "lr": 7.226181050409078e-07, "epoch": 0.1808103471030751, "percentage": 3.62, "elapsed_time": "0:01:31", "remaining_time": "0:40:40", "throughput": 7334.39, "total_tokens": 671488} +{"current_steps": 1375, "total_steps": 37885, "loss": 0.1112, "lr": 7.252573238321457e-07, "epoch": 0.18147023888082353, "percentage": 3.63, "elapsed_time": "0:01:31", "remaining_time": "0:40:39", "throughput": 7334.92, "total_tokens": 673920} +{"current_steps": 1380, "total_steps": 37885, "loss": 0.2556, "lr": 7.278965426233834e-07, "epoch": 0.182130130658572, "percentage": 3.64, "elapsed_time": "0:01:32", "remaining_time": "0:40:39", "throughput": 7335.8, "total_tokens": 676416} +{"current_steps": 1385, "total_steps": 37885, "loss": 0.1058, "lr": 7.305357614146212e-07, "epoch": 0.18279002243632045, "percentage": 3.66, "elapsed_time": "0:01:32", "remaining_time": "0:40:38", "throughput": 7337.96, "total_tokens": 679040} +{"current_steps": 1390, "total_steps": 37885, "loss": 0.188, "lr": 7.33174980205859e-07, "epoch": 0.1834499142140689, "percentage": 3.67, "elapsed_time": "0:01:32", "remaining_time": "0:40:38", "throughput": 7342.18, "total_tokens": 681920} +{"current_steps": 1395, "total_steps": 37885, "loss": 0.0583, "lr": 7.358141989970969e-07, "epoch": 0.18410980599181734, "percentage": 3.68, "elapsed_time": "0:01:33", "remaining_time": "0:40:38", "throughput": 7342.91, "total_tokens": 684416} +{"current_steps": 1400, "total_steps": 37885, "loss": 0.0136, "lr": 7.384534177883346e-07, "epoch": 0.18476969776956578, "percentage": 3.7, "elapsed_time": "0:01:33", "remaining_time": "0:40:37", "throughput": 7344.02, "total_tokens": 686976} +{"current_steps": 1405, "total_steps": 37885, "loss": 0.1873, "lr": 7.410926365795724e-07, "epoch": 0.18542958954731423, "percentage": 3.71, "elapsed_time": "0:01:33", "remaining_time": "0:40:37", "throughput": 7343.92, "total_tokens": 689408} +{"current_steps": 1410, "total_steps": 37885, "loss": 0.0987, "lr": 7.437318553708102e-07, "epoch": 0.1860894813250627, "percentage": 3.72, "elapsed_time": "0:01:34", "remaining_time": "0:40:37", "throughput": 7342.46, "total_tokens": 691712} +{"current_steps": 1415, "total_steps": 37885, "loss": 0.1017, "lr": 7.463710741620481e-07, "epoch": 0.18674937310281114, "percentage": 3.73, "elapsed_time": "0:01:34", "remaining_time": "0:40:36", "throughput": 7341.59, "total_tokens": 694080} +{"current_steps": 1420, "total_steps": 37885, "loss": 0.1519, "lr": 7.490102929532857e-07, "epoch": 0.1874092648805596, "percentage": 3.75, "elapsed_time": "0:01:34", "remaining_time": "0:40:36", "throughput": 7342.37, "total_tokens": 696640} +{"current_steps": 1425, "total_steps": 37885, "loss": 0.0091, "lr": 7.516495117445236e-07, "epoch": 0.18806915665830803, "percentage": 3.76, "elapsed_time": "0:01:35", "remaining_time": "0:40:36", "throughput": 7342.54, "total_tokens": 699136} +{"current_steps": 1430, "total_steps": 37885, "loss": 0.3917, "lr": 7.542887305357614e-07, "epoch": 0.18872904843605648, "percentage": 3.77, "elapsed_time": "0:01:35", "remaining_time": "0:40:35", "throughput": 7343.38, "total_tokens": 701696} +{"current_steps": 1435, "total_steps": 37885, "loss": 0.206, "lr": 7.569279493269993e-07, "epoch": 0.18938894021380492, "percentage": 3.79, "elapsed_time": "0:01:35", "remaining_time": "0:40:35", "throughput": 7345.34, "total_tokens": 704384} +{"current_steps": 1440, "total_steps": 37885, "loss": 0.0719, "lr": 7.595671681182369e-07, "epoch": 0.1900488319915534, "percentage": 3.8, "elapsed_time": "0:01:36", "remaining_time": "0:40:35", "throughput": 7342.64, "total_tokens": 706560} +{"current_steps": 1445, "total_steps": 37885, "loss": 0.0804, "lr": 7.622063869094748e-07, "epoch": 0.19070872376930184, "percentage": 3.81, "elapsed_time": "0:01:36", "remaining_time": "0:40:35", "throughput": 7344.74, "total_tokens": 709248} +{"current_steps": 1450, "total_steps": 37885, "loss": 0.1375, "lr": 7.648456057007126e-07, "epoch": 0.19136861554705029, "percentage": 3.83, "elapsed_time": "0:01:36", "remaining_time": "0:40:34", "throughput": 7340.48, "total_tokens": 711296} +{"current_steps": 1455, "total_steps": 37885, "loss": 0.0051, "lr": 7.674848244919505e-07, "epoch": 0.19202850732479873, "percentage": 3.84, "elapsed_time": "0:01:37", "remaining_time": "0:40:34", "throughput": 7340.22, "total_tokens": 713728} +{"current_steps": 1460, "total_steps": 37885, "loss": 0.0025, "lr": 7.701240432831881e-07, "epoch": 0.19268839910254718, "percentage": 3.85, "elapsed_time": "0:01:37", "remaining_time": "0:40:34", "throughput": 7340.8, "total_tokens": 716224} +{"current_steps": 1465, "total_steps": 37885, "loss": 0.3884, "lr": 7.727632620744259e-07, "epoch": 0.19334829088029562, "percentage": 3.87, "elapsed_time": "0:01:37", "remaining_time": "0:40:33", "throughput": 7339.29, "total_tokens": 718528} +{"current_steps": 1470, "total_steps": 37885, "loss": 0.2433, "lr": 7.754024808656638e-07, "epoch": 0.1940081826580441, "percentage": 3.88, "elapsed_time": "0:01:38", "remaining_time": "0:40:33", "throughput": 7338.84, "total_tokens": 720960} +{"current_steps": 1475, "total_steps": 37885, "loss": 0.196, "lr": 7.780416996569014e-07, "epoch": 0.19466807443579254, "percentage": 3.89, "elapsed_time": "0:01:38", "remaining_time": "0:40:33", "throughput": 7337.1, "total_tokens": 723200} +{"current_steps": 1480, "total_steps": 37885, "loss": 0.1681, "lr": 7.806809184481393e-07, "epoch": 0.19532796621354098, "percentage": 3.91, "elapsed_time": "0:01:38", "remaining_time": "0:40:32", "throughput": 7339.69, "total_tokens": 725888} +{"current_steps": 1485, "total_steps": 37885, "loss": 0.0609, "lr": 7.833201372393771e-07, "epoch": 0.19598785799128943, "percentage": 3.92, "elapsed_time": "0:01:39", "remaining_time": "0:40:32", "throughput": 7339.34, "total_tokens": 728256} +{"current_steps": 1490, "total_steps": 37885, "loss": 0.1787, "lr": 7.859593560306149e-07, "epoch": 0.19664774976903787, "percentage": 3.93, "elapsed_time": "0:01:39", "remaining_time": "0:40:31", "throughput": 7339.8, "total_tokens": 730688} +{"current_steps": 1495, "total_steps": 37885, "loss": 0.2156, "lr": 7.885985748218526e-07, "epoch": 0.19730764154678632, "percentage": 3.95, "elapsed_time": "0:01:39", "remaining_time": "0:40:31", "throughput": 7338.74, "total_tokens": 732992} +{"current_steps": 1500, "total_steps": 37885, "loss": 0.0585, "lr": 7.912377936130905e-07, "epoch": 0.1979675333245348, "percentage": 3.96, "elapsed_time": "0:01:40", "remaining_time": "0:40:30", "throughput": 7338.95, "total_tokens": 735424} +{"current_steps": 1505, "total_steps": 37885, "loss": 0.1832, "lr": 7.938770124043283e-07, "epoch": 0.19862742510228323, "percentage": 3.97, "elapsed_time": "0:01:40", "remaining_time": "0:40:30", "throughput": 7337.52, "total_tokens": 737664} +{"current_steps": 1510, "total_steps": 37885, "loss": 0.2169, "lr": 7.965162311955661e-07, "epoch": 0.19928731688003168, "percentage": 3.99, "elapsed_time": "0:01:40", "remaining_time": "0:40:29", "throughput": 7337.77, "total_tokens": 740096} +{"current_steps": 1515, "total_steps": 37885, "loss": 0.1121, "lr": 7.991554499868038e-07, "epoch": 0.19994720865778012, "percentage": 4.0, "elapsed_time": "0:01:41", "remaining_time": "0:40:29", "throughput": 7341.24, "total_tokens": 742912} +{"current_steps": 1520, "total_steps": 37885, "loss": 0.1211, "lr": 8.017946687780417e-07, "epoch": 0.20060710043552857, "percentage": 4.01, "elapsed_time": "0:01:41", "remaining_time": "0:40:28", "throughput": 7343.16, "total_tokens": 745536} +{"current_steps": 1525, "total_steps": 37885, "loss": 0.0182, "lr": 8.044338875692794e-07, "epoch": 0.201266992213277, "percentage": 4.03, "elapsed_time": "0:01:41", "remaining_time": "0:40:28", "throughput": 7343.41, "total_tokens": 747968} +{"current_steps": 1530, "total_steps": 37885, "loss": 0.1159, "lr": 8.070731063605173e-07, "epoch": 0.20192688399102549, "percentage": 4.04, "elapsed_time": "0:01:42", "remaining_time": "0:40:28", "throughput": 7344.09, "total_tokens": 750464} +{"current_steps": 1535, "total_steps": 37885, "loss": 0.1005, "lr": 8.09712325151755e-07, "epoch": 0.20258677576877393, "percentage": 4.05, "elapsed_time": "0:01:42", "remaining_time": "0:40:27", "throughput": 7344.41, "total_tokens": 752896} +{"current_steps": 1540, "total_steps": 37885, "loss": 0.2272, "lr": 8.123515439429929e-07, "epoch": 0.20324666754652237, "percentage": 4.06, "elapsed_time": "0:01:42", "remaining_time": "0:40:27", "throughput": 7347.39, "total_tokens": 755648} +{"current_steps": 1545, "total_steps": 37885, "loss": 0.2213, "lr": 8.149907627342306e-07, "epoch": 0.20390655932427082, "percentage": 4.08, "elapsed_time": "0:01:43", "remaining_time": "0:40:26", "throughput": 7348.3, "total_tokens": 758144} +{"current_steps": 1550, "total_steps": 37885, "loss": 0.0083, "lr": 8.176299815254685e-07, "epoch": 0.20456645110201926, "percentage": 4.09, "elapsed_time": "0:01:43", "remaining_time": "0:40:26", "throughput": 7349.8, "total_tokens": 760704} +{"current_steps": 1555, "total_steps": 37885, "loss": 0.0023, "lr": 8.202692003167062e-07, "epoch": 0.2052263428797677, "percentage": 4.1, "elapsed_time": "0:01:43", "remaining_time": "0:40:25", "throughput": 7350.04, "total_tokens": 763136} +{"current_steps": 1560, "total_steps": 37885, "loss": 0.1036, "lr": 8.229084191079441e-07, "epoch": 0.20588623465751615, "percentage": 4.12, "elapsed_time": "0:01:44", "remaining_time": "0:40:25", "throughput": 7353.97, "total_tokens": 766016} +{"current_steps": 1565, "total_steps": 37885, "loss": 0.3823, "lr": 8.255476378991818e-07, "epoch": 0.20654612643526463, "percentage": 4.13, "elapsed_time": "0:01:44", "remaining_time": "0:40:25", "throughput": 7354.63, "total_tokens": 768512} +{"current_steps": 1570, "total_steps": 37885, "loss": 0.2189, "lr": 8.281868566904196e-07, "epoch": 0.20720601821301307, "percentage": 4.14, "elapsed_time": "0:01:44", "remaining_time": "0:40:24", "throughput": 7353.85, "total_tokens": 770816} +{"current_steps": 1575, "total_steps": 37885, "loss": 0.2545, "lr": 8.308260754816574e-07, "epoch": 0.20786590999076152, "percentage": 4.16, "elapsed_time": "0:01:45", "remaining_time": "0:40:24", "throughput": 7354.38, "total_tokens": 773312} +{"current_steps": 1580, "total_steps": 37885, "loss": 0.043, "lr": 8.334652942728951e-07, "epoch": 0.20852580176850996, "percentage": 4.17, "elapsed_time": "0:01:45", "remaining_time": "0:40:23", "throughput": 7354.11, "total_tokens": 775680} +{"current_steps": 1585, "total_steps": 37885, "loss": 0.1056, "lr": 8.36104513064133e-07, "epoch": 0.2091856935462584, "percentage": 4.18, "elapsed_time": "0:01:45", "remaining_time": "0:40:23", "throughput": 7353.73, "total_tokens": 778048} +{"current_steps": 1590, "total_steps": 37885, "loss": 0.2807, "lr": 8.387437318553708e-07, "epoch": 0.20984558532400685, "percentage": 4.2, "elapsed_time": "0:01:46", "remaining_time": "0:40:22", "throughput": 7355.52, "total_tokens": 780672} +{"current_steps": 1595, "total_steps": 37885, "loss": 0.4118, "lr": 8.413829506466085e-07, "epoch": 0.21050547710175532, "percentage": 4.21, "elapsed_time": "0:01:46", "remaining_time": "0:40:22", "throughput": 7357.73, "total_tokens": 783360} +{"current_steps": 1600, "total_steps": 37885, "loss": 0.1426, "lr": 8.440221694378463e-07, "epoch": 0.21116536887950377, "percentage": 4.22, "elapsed_time": "0:01:46", "remaining_time": "0:40:21", "throughput": 7358.33, "total_tokens": 785856} +{"current_steps": 1605, "total_steps": 37885, "loss": 0.1497, "lr": 8.466613882290842e-07, "epoch": 0.2118252606572522, "percentage": 4.24, "elapsed_time": "0:01:47", "remaining_time": "0:40:21", "throughput": 7360.16, "total_tokens": 788480} +{"current_steps": 1610, "total_steps": 37885, "loss": 0.1096, "lr": 8.49300607020322e-07, "epoch": 0.21248515243500066, "percentage": 4.25, "elapsed_time": "0:01:47", "remaining_time": "0:40:21", "throughput": 7361.35, "total_tokens": 791040} +{"current_steps": 1615, "total_steps": 37885, "loss": 0.0508, "lr": 8.519398258115597e-07, "epoch": 0.2131450442127491, "percentage": 4.26, "elapsed_time": "0:01:47", "remaining_time": "0:40:20", "throughput": 7362.54, "total_tokens": 793600} +{"current_steps": 1620, "total_steps": 37885, "loss": 0.2171, "lr": 8.545790446027975e-07, "epoch": 0.21380493599049755, "percentage": 4.28, "elapsed_time": "0:01:48", "remaining_time": "0:40:20", "throughput": 7362.36, "total_tokens": 795968} +{"current_steps": 1625, "total_steps": 37885, "loss": 0.0251, "lr": 8.572182633940354e-07, "epoch": 0.21446482776824602, "percentage": 4.29, "elapsed_time": "0:01:48", "remaining_time": "0:40:19", "throughput": 7361.51, "total_tokens": 798272} +{"current_steps": 1630, "total_steps": 37885, "loss": 0.1544, "lr": 8.59857482185273e-07, "epoch": 0.21512471954599446, "percentage": 4.3, "elapsed_time": "0:01:48", "remaining_time": "0:40:19", "throughput": 7364.2, "total_tokens": 801024} +{"current_steps": 1635, "total_steps": 37885, "loss": 0.1345, "lr": 8.624967009765109e-07, "epoch": 0.2157846113237429, "percentage": 4.32, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7365.38, "total_tokens": 803584} +{"current_steps": 1640, "total_steps": 37885, "loss": 0.0544, "lr": 8.651359197677487e-07, "epoch": 0.21644450310149135, "percentage": 4.33, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7365.02, "total_tokens": 805952} +{"current_steps": 1645, "total_steps": 37885, "loss": 0.0881, "lr": 8.677751385589866e-07, "epoch": 0.2171043948792398, "percentage": 4.34, "elapsed_time": "0:01:49", "remaining_time": "0:40:18", "throughput": 7366.17, "total_tokens": 808512} +{"current_steps": 1650, "total_steps": 37885, "loss": 0.0636, "lr": 8.704143573502242e-07, "epoch": 0.21776428665698824, "percentage": 4.36, "elapsed_time": "0:01:50", "remaining_time": "0:40:17", "throughput": 7367.82, "total_tokens": 811136} +{"current_steps": 1655, "total_steps": 37885, "loss": 0.0526, "lr": 8.730535761414621e-07, "epoch": 0.21842417843473672, "percentage": 4.37, "elapsed_time": "0:01:50", "remaining_time": "0:40:17", "throughput": 7366.26, "total_tokens": 813376} +{"current_steps": 1660, "total_steps": 37885, "loss": 0.2304, "lr": 8.756927949326999e-07, "epoch": 0.21908407021248516, "percentage": 4.38, "elapsed_time": "0:01:50", "remaining_time": "0:40:16", "throughput": 7366.75, "total_tokens": 815872} +{"current_steps": 1665, "total_steps": 37885, "loss": 0.0414, "lr": 8.783320137239377e-07, "epoch": 0.2197439619902336, "percentage": 4.39, "elapsed_time": "0:01:51", "remaining_time": "0:40:16", "throughput": 7366.27, "total_tokens": 818240} +{"current_steps": 1670, "total_steps": 37885, "loss": 0.108, "lr": 8.809712325151754e-07, "epoch": 0.22040385376798205, "percentage": 4.41, "elapsed_time": "0:01:51", "remaining_time": "0:40:15", "throughput": 7366.96, "total_tokens": 820736} +{"current_steps": 1675, "total_steps": 37885, "loss": 0.3837, "lr": 8.836104513064132e-07, "epoch": 0.2210637455457305, "percentage": 4.42, "elapsed_time": "0:01:51", "remaining_time": "0:40:15", "throughput": 7370.53, "total_tokens": 823616} +{"current_steps": 1680, "total_steps": 37885, "loss": 0.1827, "lr": 8.862496700976511e-07, "epoch": 0.22172363732347894, "percentage": 4.43, "elapsed_time": "0:01:52", "remaining_time": "0:40:15", "throughput": 7371.07, "total_tokens": 826112} +{"current_steps": 1685, "total_steps": 37885, "loss": 0.1026, "lr": 8.888888888888888e-07, "epoch": 0.2223835291012274, "percentage": 4.45, "elapsed_time": "0:01:52", "remaining_time": "0:40:14", "throughput": 7372.2, "total_tokens": 828672} +{"current_steps": 1690, "total_steps": 37885, "loss": 0.1787, "lr": 8.915281076801266e-07, "epoch": 0.22304342087897586, "percentage": 4.46, "elapsed_time": "0:01:52", "remaining_time": "0:40:14", "throughput": 7374.02, "total_tokens": 831296} +{"current_steps": 1695, "total_steps": 37885, "loss": 0.0912, "lr": 8.941673264713644e-07, "epoch": 0.2237033126567243, "percentage": 4.47, "elapsed_time": "0:01:53", "remaining_time": "0:40:14", "throughput": 7375.11, "total_tokens": 833856} +{"current_steps": 1700, "total_steps": 37885, "loss": 0.2237, "lr": 8.968065452626022e-07, "epoch": 0.22436320443447275, "percentage": 4.49, "elapsed_time": "0:01:53", "remaining_time": "0:40:13", "throughput": 7376.72, "total_tokens": 836480} +{"current_steps": 1705, "total_steps": 37885, "loss": 0.0627, "lr": 8.9944576405384e-07, "epoch": 0.2250230962122212, "percentage": 4.5, "elapsed_time": "0:01:53", "remaining_time": "0:40:13", "throughput": 7377.07, "total_tokens": 838976} +{"current_steps": 1710, "total_steps": 37885, "loss": 0.1252, "lr": 9.020849828450778e-07, "epoch": 0.22568298798996964, "percentage": 4.51, "elapsed_time": "0:01:54", "remaining_time": "0:40:12", "throughput": 7379.66, "total_tokens": 841728} +{"current_steps": 1715, "total_steps": 37885, "loss": 0.0499, "lr": 9.047242016363156e-07, "epoch": 0.22634287976771808, "percentage": 4.53, "elapsed_time": "0:01:54", "remaining_time": "0:40:12", "throughput": 7378.26, "total_tokens": 843968} +{"current_steps": 1720, "total_steps": 37885, "loss": 0.4636, "lr": 9.073634204275534e-07, "epoch": 0.22700277154546655, "percentage": 4.54, "elapsed_time": "0:01:54", "remaining_time": "0:40:11", "throughput": 7378.95, "total_tokens": 846464} +{"current_steps": 1725, "total_steps": 37885, "loss": 0.2002, "lr": 9.100026392187912e-07, "epoch": 0.227662663323215, "percentage": 4.55, "elapsed_time": "0:01:55", "remaining_time": "0:40:11", "throughput": 7380.57, "total_tokens": 849088} +{"current_steps": 1730, "total_steps": 37885, "loss": 0.0052, "lr": 9.12641858010029e-07, "epoch": 0.22832255510096344, "percentage": 4.57, "elapsed_time": "0:01:55", "remaining_time": "0:40:11", "throughput": 7382.2, "total_tokens": 851712} +{"current_steps": 1735, "total_steps": 37885, "loss": 0.156, "lr": 9.152810768012667e-07, "epoch": 0.2289824468787119, "percentage": 4.58, "elapsed_time": "0:01:55", "remaining_time": "0:40:10", "throughput": 7382.69, "total_tokens": 854208} +{"current_steps": 1740, "total_steps": 37885, "loss": 0.0557, "lr": 9.179202955925046e-07, "epoch": 0.22964233865646033, "percentage": 4.59, "elapsed_time": "0:01:56", "remaining_time": "0:40:10", "throughput": 7382.05, "total_tokens": 856576} +{"current_steps": 1745, "total_steps": 37885, "loss": 0.2487, "lr": 9.205595143837424e-07, "epoch": 0.23030223043420878, "percentage": 4.61, "elapsed_time": "0:01:56", "remaining_time": "0:40:09", "throughput": 7382.31, "total_tokens": 859008} +{"current_steps": 1750, "total_steps": 37885, "loss": 0.1679, "lr": 9.231987331749802e-07, "epoch": 0.23096212221195725, "percentage": 4.62, "elapsed_time": "0:01:56", "remaining_time": "0:40:09", "throughput": 7382.45, "total_tokens": 861440} +{"current_steps": 1755, "total_steps": 37885, "loss": 0.1502, "lr": 9.258379519662179e-07, "epoch": 0.2316220139897057, "percentage": 4.63, "elapsed_time": "0:01:57", "remaining_time": "0:40:09", "throughput": 7382.9, "total_tokens": 863936} +{"current_steps": 1760, "total_steps": 37885, "loss": 0.1127, "lr": 9.284771707574558e-07, "epoch": 0.23228190576745414, "percentage": 4.65, "elapsed_time": "0:01:57", "remaining_time": "0:40:08", "throughput": 7381.5, "total_tokens": 866176} +{"current_steps": 1765, "total_steps": 37885, "loss": 0.1574, "lr": 9.311163895486936e-07, "epoch": 0.23294179754520258, "percentage": 4.66, "elapsed_time": "0:01:57", "remaining_time": "0:40:08", "throughput": 7380.78, "total_tokens": 868480} +{"current_steps": 1770, "total_steps": 37885, "loss": 0.1025, "lr": 9.337556083399313e-07, "epoch": 0.23360168932295103, "percentage": 4.67, "elapsed_time": "0:01:57", "remaining_time": "0:40:07", "throughput": 7381.26, "total_tokens": 870976} +{"current_steps": 1775, "total_steps": 37885, "loss": 0.2237, "lr": 9.363948271311691e-07, "epoch": 0.23426158110069947, "percentage": 4.69, "elapsed_time": "0:01:58", "remaining_time": "0:40:07", "throughput": 7378.9, "total_tokens": 873088} +{"current_steps": 1780, "total_steps": 37885, "loss": 0.2408, "lr": 9.390340459224069e-07, "epoch": 0.23492147287844795, "percentage": 4.7, "elapsed_time": "0:01:58", "remaining_time": "0:40:06", "throughput": 7378.94, "total_tokens": 875520} +{"current_steps": 1785, "total_steps": 37885, "loss": 0.0166, "lr": 9.416732647136448e-07, "epoch": 0.2355813646561964, "percentage": 4.71, "elapsed_time": "0:01:58", "remaining_time": "0:40:06", "throughput": 7376.61, "total_tokens": 877632} +{"current_steps": 1790, "total_steps": 37885, "loss": 0.0702, "lr": 9.443124835048824e-07, "epoch": 0.23624125643394484, "percentage": 4.72, "elapsed_time": "0:01:59", "remaining_time": "0:40:05", "throughput": 7376.23, "total_tokens": 880000} +{"current_steps": 1795, "total_steps": 37885, "loss": 0.0669, "lr": 9.469517022961203e-07, "epoch": 0.23690114821169328, "percentage": 4.74, "elapsed_time": "0:01:59", "remaining_time": "0:40:05", "throughput": 7374.39, "total_tokens": 882176} +{"current_steps": 1800, "total_steps": 37885, "loss": 0.0867, "lr": 9.495909210873581e-07, "epoch": 0.23756103998944172, "percentage": 4.75, "elapsed_time": "0:01:59", "remaining_time": "0:40:04", "throughput": 7376.03, "total_tokens": 884800} +{"current_steps": 1805, "total_steps": 37885, "loss": 0.0113, "lr": 9.522301398785959e-07, "epoch": 0.23822093176719017, "percentage": 4.76, "elapsed_time": "0:02:00", "remaining_time": "0:40:04", "throughput": 7375.09, "total_tokens": 887104} +{"current_steps": 1810, "total_steps": 37885, "loss": 0.043, "lr": 9.548693586698336e-07, "epoch": 0.23888082354493864, "percentage": 4.78, "elapsed_time": "0:02:00", "remaining_time": "0:40:04", "throughput": 7373.84, "total_tokens": 889408} +{"current_steps": 1815, "total_steps": 37885, "loss": 0.2031, "lr": 9.575085774610714e-07, "epoch": 0.2395407153226871, "percentage": 4.79, "elapsed_time": "0:02:00", "remaining_time": "0:40:03", "throughput": 7372.42, "total_tokens": 891648} +{"current_steps": 1820, "total_steps": 37885, "loss": 0.2171, "lr": 9.601477962523092e-07, "epoch": 0.24020060710043553, "percentage": 4.8, "elapsed_time": "0:02:01", "remaining_time": "0:40:03", "throughput": 7373.36, "total_tokens": 894208} +{"current_steps": 1825, "total_steps": 37885, "loss": 0.1157, "lr": 9.627870150435472e-07, "epoch": 0.24086049887818398, "percentage": 4.82, "elapsed_time": "0:02:01", "remaining_time": "0:40:02", "throughput": 7374.06, "total_tokens": 896704} +{"current_steps": 1830, "total_steps": 37885, "loss": 0.045, "lr": 9.65426233834785e-07, "epoch": 0.24152039065593242, "percentage": 4.83, "elapsed_time": "0:02:01", "remaining_time": "0:40:02", "throughput": 7375.09, "total_tokens": 899264} +{"current_steps": 1835, "total_steps": 37885, "loss": 0.0719, "lr": 9.680654526260227e-07, "epoch": 0.24218028243368087, "percentage": 4.84, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7375.55, "total_tokens": 901760} +{"current_steps": 1840, "total_steps": 37885, "loss": 0.1597, "lr": 9.707046714172605e-07, "epoch": 0.24284017421142934, "percentage": 4.86, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7373.34, "total_tokens": 903872} +{"current_steps": 1845, "total_steps": 37885, "loss": 0.0005, "lr": 9.733438902084983e-07, "epoch": 0.24350006598917778, "percentage": 4.87, "elapsed_time": "0:02:02", "remaining_time": "0:40:01", "throughput": 7373.83, "total_tokens": 906368} +{"current_steps": 1850, "total_steps": 37885, "loss": 0.0012, "lr": 9.75983108999736e-07, "epoch": 0.24415995776692623, "percentage": 4.88, "elapsed_time": "0:02:03", "remaining_time": "0:40:00", "throughput": 7374.51, "total_tokens": 908864} +{"current_steps": 1855, "total_steps": 37885, "loss": 0.169, "lr": 9.786223277909738e-07, "epoch": 0.24481984954467467, "percentage": 4.9, "elapsed_time": "0:02:03", "remaining_time": "0:40:00", "throughput": 7372.81, "total_tokens": 911040} +{"current_steps": 1860, "total_steps": 37885, "loss": 0.2041, "lr": 9.812615465822116e-07, "epoch": 0.24547974132242312, "percentage": 4.91, "elapsed_time": "0:02:03", "remaining_time": "0:39:59", "throughput": 7372.44, "total_tokens": 913408} +{"current_steps": 1865, "total_steps": 37885, "loss": 0.4034, "lr": 9.839007653734496e-07, "epoch": 0.24613963310017156, "percentage": 4.92, "elapsed_time": "0:02:04", "remaining_time": "0:39:59", "throughput": 7373.28, "total_tokens": 915968} +{"current_steps": 1870, "total_steps": 37885, "loss": 0.1269, "lr": 9.865399841646871e-07, "epoch": 0.24679952487792003, "percentage": 4.94, "elapsed_time": "0:02:04", "remaining_time": "0:39:58", "throughput": 7374.43, "total_tokens": 918528} +{"current_steps": 1875, "total_steps": 37885, "loss": 0.0006, "lr": 9.89179202955925e-07, "epoch": 0.24745941665566848, "percentage": 4.95, "elapsed_time": "0:02:04", "remaining_time": "0:39:58", "throughput": 7375.94, "total_tokens": 921152} +{"current_steps": 1880, "total_steps": 37885, "loss": 0.0005, "lr": 9.918184217471629e-07, "epoch": 0.24811930843341692, "percentage": 4.96, "elapsed_time": "0:02:05", "remaining_time": "0:39:58", "throughput": 7375.54, "total_tokens": 923520} +{"current_steps": 1885, "total_steps": 37885, "loss": 0.1003, "lr": 9.944576405384004e-07, "epoch": 0.24877920021116537, "percentage": 4.98, "elapsed_time": "0:02:05", "remaining_time": "0:39:57", "throughput": 7375.19, "total_tokens": 925888} +{"current_steps": 1890, "total_steps": 37885, "loss": 0.1144, "lr": 9.970968593296384e-07, "epoch": 0.2494390919889138, "percentage": 4.99, "elapsed_time": "0:02:05", "remaining_time": "0:39:57", "throughput": 7377.87, "total_tokens": 928704} +{"current_steps": 1895, "total_steps": 37885, "loss": 0.4074, "lr": 9.997360781208762e-07, "epoch": 0.2500989837666623, "percentage": 5.0, "elapsed_time": "0:02:06", "remaining_time": "0:39:56", "throughput": 7376.53, "total_tokens": 930944} +{"current_steps": 1895, "total_steps": 37885, "eval_loss": 0.15521390736103058, "epoch": 0.2500989837666623, "percentage": 5.0, "elapsed_time": "0:02:14", "remaining_time": "0:42:26", "throughput": 6943.21, "total_tokens": 930944} +{"current_steps": 1900, "total_steps": 37885, "loss": 0.2799, "lr": 1.002375296912114e-06, "epoch": 0.25075887554441073, "percentage": 5.02, "elapsed_time": "0:02:46", "remaining_time": "0:52:26", "throughput": 5617.74, "total_tokens": 933376} +{"current_steps": 1905, "total_steps": 37885, "loss": 0.151, "lr": 1.0050145157033517e-06, "epoch": 0.2514187673221592, "percentage": 5.03, "elapsed_time": "0:02:46", "remaining_time": "0:52:24", "throughput": 5622.32, "total_tokens": 936000} +{"current_steps": 1910, "total_steps": 37885, "loss": 0.226, "lr": 1.0076537344945895e-06, "epoch": 0.2520786590999076, "percentage": 5.04, "elapsed_time": "0:02:46", "remaining_time": "0:52:21", "throughput": 5625.78, "total_tokens": 938432} +{"current_steps": 1915, "total_steps": 37885, "loss": 0.1408, "lr": 1.0102929532858273e-06, "epoch": 0.25273855087765607, "percentage": 5.05, "elapsed_time": "0:02:47", "remaining_time": "0:52:19", "throughput": 5631.61, "total_tokens": 941312} +{"current_steps": 1920, "total_steps": 37885, "loss": 0.0428, "lr": 1.012932172077065e-06, "epoch": 0.2533984426554045, "percentage": 5.07, "elapsed_time": "0:02:47", "remaining_time": "0:52:17", "throughput": 5633.62, "total_tokens": 943488} +{"current_steps": 1925, "total_steps": 37885, "loss": 0.1021, "lr": 1.015571390868303e-06, "epoch": 0.25405833443315295, "percentage": 5.08, "elapsed_time": "0:02:47", "remaining_time": "0:52:14", "throughput": 5636.77, "total_tokens": 945856} +{"current_steps": 1930, "total_steps": 37885, "loss": 0.1363, "lr": 1.0182106096595406e-06, "epoch": 0.2547182262109014, "percentage": 5.09, "elapsed_time": "0:02:48", "remaining_time": "0:52:12", "throughput": 5640.46, "total_tokens": 948352} +{"current_steps": 1935, "total_steps": 37885, "loss": 0.0973, "lr": 1.0208498284507786e-06, "epoch": 0.25537811798864984, "percentage": 5.11, "elapsed_time": "0:02:48", "remaining_time": "0:52:09", "throughput": 5644.95, "total_tokens": 950976} +{"current_steps": 1940, "total_steps": 37885, "loss": 0.1607, "lr": 1.0234890472420164e-06, "epoch": 0.2560380097663983, "percentage": 5.12, "elapsed_time": "0:02:48", "remaining_time": "0:52:07", "throughput": 5647.18, "total_tokens": 953216} +{"current_steps": 1945, "total_steps": 37885, "loss": 0.015, "lr": 1.0261282660332541e-06, "epoch": 0.25669790154414673, "percentage": 5.13, "elapsed_time": "0:02:49", "remaining_time": "0:52:05", "throughput": 5650.55, "total_tokens": 955648} +{"current_steps": 1950, "total_steps": 37885, "loss": 0.0037, "lr": 1.028767484824492e-06, "epoch": 0.25735779332189523, "percentage": 5.15, "elapsed_time": "0:02:49", "remaining_time": "0:52:02", "throughput": 5652.84, "total_tokens": 957888} +{"current_steps": 1955, "total_steps": 37885, "loss": 0.1865, "lr": 1.0314067036157297e-06, "epoch": 0.2580176850996437, "percentage": 5.16, "elapsed_time": "0:02:49", "remaining_time": "0:52:00", "throughput": 5655.07, "total_tokens": 960128} +{"current_steps": 1960, "total_steps": 37885, "loss": 0.0348, "lr": 1.0340459224069675e-06, "epoch": 0.2586775768773921, "percentage": 5.17, "elapsed_time": "0:02:50", "remaining_time": "0:51:57", "throughput": 5658.05, "total_tokens": 962496} +{"current_steps": 1965, "total_steps": 37885, "loss": 0.3992, "lr": 1.0366851411982054e-06, "epoch": 0.25933746865514057, "percentage": 5.19, "elapsed_time": "0:02:50", "remaining_time": "0:51:55", "throughput": 5662.43, "total_tokens": 965120} +{"current_steps": 1970, "total_steps": 37885, "loss": 0.007, "lr": 1.039324359989443e-06, "epoch": 0.259997360432889, "percentage": 5.2, "elapsed_time": "0:02:50", "remaining_time": "0:51:53", "throughput": 5665.97, "total_tokens": 967616} +{"current_steps": 1975, "total_steps": 37885, "loss": 0.1056, "lr": 1.0419635787806808e-06, "epoch": 0.26065725221063746, "percentage": 5.21, "elapsed_time": "0:02:51", "remaining_time": "0:51:51", "throughput": 5670.26, "total_tokens": 970240} +{"current_steps": 1980, "total_steps": 37885, "loss": 0.2734, "lr": 1.0446027975719188e-06, "epoch": 0.2613171439883859, "percentage": 5.23, "elapsed_time": "0:02:51", "remaining_time": "0:51:48", "throughput": 5672.76, "total_tokens": 972544} +{"current_steps": 1985, "total_steps": 37885, "loss": 0.2381, "lr": 1.0472420163631565e-06, "epoch": 0.26197703576613435, "percentage": 5.24, "elapsed_time": "0:02:51", "remaining_time": "0:51:46", "throughput": 5675.73, "total_tokens": 974912} +{"current_steps": 1990, "total_steps": 37885, "loss": 0.4659, "lr": 1.049881235154394e-06, "epoch": 0.2626369275438828, "percentage": 5.25, "elapsed_time": "0:02:52", "remaining_time": "0:51:44", "throughput": 5677.57, "total_tokens": 977088} +{"current_steps": 1995, "total_steps": 37885, "loss": 0.241, "lr": 1.052520453945632e-06, "epoch": 0.26329681932163124, "percentage": 5.27, "elapsed_time": "0:02:52", "remaining_time": "0:51:41", "throughput": 5681.53, "total_tokens": 979648} +{"current_steps": 2000, "total_steps": 37885, "loss": 0.0961, "lr": 1.0551596727368699e-06, "epoch": 0.2639567110993797, "percentage": 5.28, "elapsed_time": "0:02:52", "remaining_time": "0:51:39", "throughput": 5686.03, "total_tokens": 982336} +{"current_steps": 2005, "total_steps": 37885, "loss": 0.0121, "lr": 1.0577988915281074e-06, "epoch": 0.2646166028771281, "percentage": 5.29, "elapsed_time": "0:02:53", "remaining_time": "0:51:37", "throughput": 5689.18, "total_tokens": 984768} +{"current_steps": 2010, "total_steps": 37885, "loss": 0.0467, "lr": 1.0604381103193454e-06, "epoch": 0.2652764946548766, "percentage": 5.31, "elapsed_time": "0:02:53", "remaining_time": "0:51:35", "throughput": 5692.08, "total_tokens": 987136} +{"current_steps": 2015, "total_steps": 37885, "loss": 0.142, "lr": 1.0630773291105832e-06, "epoch": 0.26593638643262507, "percentage": 5.32, "elapsed_time": "0:02:53", "remaining_time": "0:51:33", "throughput": 5696.56, "total_tokens": 989824} +{"current_steps": 2020, "total_steps": 37885, "loss": 0.2781, "lr": 1.0657165479018212e-06, "epoch": 0.2665962782103735, "percentage": 5.33, "elapsed_time": "0:02:54", "remaining_time": "0:51:30", "throughput": 5698.73, "total_tokens": 992064} +{"current_steps": 2025, "total_steps": 37885, "loss": 0.2715, "lr": 1.0683557666930587e-06, "epoch": 0.26725616998812196, "percentage": 5.35, "elapsed_time": "0:02:54", "remaining_time": "0:51:28", "throughput": 5701.16, "total_tokens": 994368} +{"current_steps": 2030, "total_steps": 37885, "loss": 0.1645, "lr": 1.0709949854842965e-06, "epoch": 0.2679160617658704, "percentage": 5.36, "elapsed_time": "0:02:54", "remaining_time": "0:51:26", "throughput": 5704.63, "total_tokens": 996864} +{"current_steps": 2035, "total_steps": 37885, "loss": 0.1714, "lr": 1.0736342042755345e-06, "epoch": 0.26857595354361885, "percentage": 5.37, "elapsed_time": "0:02:55", "remaining_time": "0:51:24", "throughput": 5708.22, "total_tokens": 999360} +{"current_steps": 2040, "total_steps": 37885, "loss": 0.0919, "lr": 1.0762734230667723e-06, "epoch": 0.2692358453213673, "percentage": 5.38, "elapsed_time": "0:02:55", "remaining_time": "0:51:22", "throughput": 5712.06, "total_tokens": 1001920} +{"current_steps": 2045, "total_steps": 37885, "loss": 0.0063, "lr": 1.0789126418580098e-06, "epoch": 0.26989573709911574, "percentage": 5.4, "elapsed_time": "0:02:55", "remaining_time": "0:51:19", "throughput": 5714.47, "total_tokens": 1004224} +{"current_steps": 2050, "total_steps": 37885, "loss": 0.0012, "lr": 1.0815518606492478e-06, "epoch": 0.2705556288768642, "percentage": 5.41, "elapsed_time": "0:02:56", "remaining_time": "0:51:17", "throughput": 5716.8, "total_tokens": 1006528} +{"current_steps": 2055, "total_steps": 37885, "loss": 0.2114, "lr": 1.0841910794404856e-06, "epoch": 0.27121552065461263, "percentage": 5.42, "elapsed_time": "0:02:56", "remaining_time": "0:51:15", "throughput": 5719.57, "total_tokens": 1008896} +{"current_steps": 2060, "total_steps": 37885, "loss": 0.1478, "lr": 1.0868302982317234e-06, "epoch": 0.2718754124323611, "percentage": 5.44, "elapsed_time": "0:02:56", "remaining_time": "0:51:13", "throughput": 5724.28, "total_tokens": 1011648} +{"current_steps": 2065, "total_steps": 37885, "loss": 0.0509, "lr": 1.0894695170229611e-06, "epoch": 0.2725353042101095, "percentage": 5.45, "elapsed_time": "0:02:57", "remaining_time": "0:51:11", "throughput": 5728.02, "total_tokens": 1014208} +{"current_steps": 2070, "total_steps": 37885, "loss": 0.105, "lr": 1.092108735814199e-06, "epoch": 0.27319519598785796, "percentage": 5.46, "elapsed_time": "0:02:57", "remaining_time": "0:51:09", "throughput": 5730.96, "total_tokens": 1016640} +{"current_steps": 2075, "total_steps": 37885, "loss": 0.0663, "lr": 1.0947479546054369e-06, "epoch": 0.27385508776560646, "percentage": 5.48, "elapsed_time": "0:02:57", "remaining_time": "0:51:09", "throughput": 5731.14, "total_tokens": 1019328} +{"current_steps": 2080, "total_steps": 37885, "loss": 0.0865, "lr": 1.0973871733966747e-06, "epoch": 0.2745149795433549, "percentage": 5.49, "elapsed_time": "0:02:58", "remaining_time": "0:51:07", "throughput": 5733.7, "total_tokens": 1021696} +{"current_steps": 2085, "total_steps": 37885, "loss": 0.2014, "lr": 1.1000263921879122e-06, "epoch": 0.27517487132110335, "percentage": 5.5, "elapsed_time": "0:02:58", "remaining_time": "0:51:05", "throughput": 5737.41, "total_tokens": 1024256} +{"current_steps": 2090, "total_steps": 37885, "loss": 0.1935, "lr": 1.1026656109791502e-06, "epoch": 0.2758347630988518, "percentage": 5.52, "elapsed_time": "0:02:58", "remaining_time": "0:51:03", "throughput": 5739.7, "total_tokens": 1026560} +{"current_steps": 2095, "total_steps": 37885, "loss": 0.2346, "lr": 1.105304829770388e-06, "epoch": 0.27649465487660024, "percentage": 5.53, "elapsed_time": "0:02:59", "remaining_time": "0:51:01", "throughput": 5743.56, "total_tokens": 1029184} +{"current_steps": 2100, "total_steps": 37885, "loss": 0.0947, "lr": 1.1079440485616255e-06, "epoch": 0.2771545466543487, "percentage": 5.54, "elapsed_time": "0:02:59", "remaining_time": "0:50:59", "throughput": 5747.28, "total_tokens": 1031744} +{"current_steps": 2105, "total_steps": 37885, "loss": 0.1724, "lr": 1.1105832673528635e-06, "epoch": 0.27781443843209713, "percentage": 5.56, "elapsed_time": "0:02:59", "remaining_time": "0:50:56", "throughput": 5749.71, "total_tokens": 1034048} +{"current_steps": 2110, "total_steps": 37885, "loss": 0.0564, "lr": 1.1132224861441013e-06, "epoch": 0.2784743302098456, "percentage": 5.57, "elapsed_time": "0:03:00", "remaining_time": "0:50:54", "throughput": 5753.09, "total_tokens": 1036544} +{"current_steps": 2115, "total_steps": 37885, "loss": 0.2777, "lr": 1.115861704935339e-06, "epoch": 0.279134221987594, "percentage": 5.58, "elapsed_time": "0:03:00", "remaining_time": "0:50:52", "throughput": 5755.36, "total_tokens": 1038848} +{"current_steps": 2120, "total_steps": 37885, "loss": 0.1166, "lr": 1.1185009237265768e-06, "epoch": 0.27979411376534247, "percentage": 5.6, "elapsed_time": "0:03:00", "remaining_time": "0:50:50", "throughput": 5757.76, "total_tokens": 1041152} +{"current_steps": 2125, "total_steps": 37885, "loss": 0.1476, "lr": 1.1211401425178146e-06, "epoch": 0.2804540055430909, "percentage": 5.61, "elapsed_time": "0:03:01", "remaining_time": "0:50:48", "throughput": 5762.62, "total_tokens": 1043968} +{"current_steps": 2130, "total_steps": 37885, "loss": 0.2938, "lr": 1.1237793613090524e-06, "epoch": 0.28111389732083936, "percentage": 5.62, "elapsed_time": "0:03:01", "remaining_time": "0:50:46", "throughput": 5764.95, "total_tokens": 1046272} +{"current_steps": 2135, "total_steps": 37885, "loss": 0.1254, "lr": 1.1264185801002904e-06, "epoch": 0.28177378909858786, "percentage": 5.64, "elapsed_time": "0:03:01", "remaining_time": "0:50:44", "throughput": 5766.56, "total_tokens": 1048448} +{"current_steps": 2140, "total_steps": 37885, "loss": 0.2135, "lr": 1.129057798891528e-06, "epoch": 0.2824336808763363, "percentage": 5.65, "elapsed_time": "0:03:02", "remaining_time": "0:50:42", "throughput": 5770.44, "total_tokens": 1051072} +{"current_steps": 2145, "total_steps": 37885, "loss": 0.0968, "lr": 1.131697017682766e-06, "epoch": 0.28309357265408475, "percentage": 5.66, "elapsed_time": "0:03:02", "remaining_time": "0:50:40", "throughput": 5772.43, "total_tokens": 1053312} +{"current_steps": 2150, "total_steps": 37885, "loss": 0.1169, "lr": 1.1343362364740037e-06, "epoch": 0.2837534644318332, "percentage": 5.68, "elapsed_time": "0:03:02", "remaining_time": "0:50:38", "throughput": 5775.05, "total_tokens": 1055680} +{"current_steps": 2155, "total_steps": 37885, "loss": 0.2016, "lr": 1.1369754552652415e-06, "epoch": 0.28441335620958164, "percentage": 5.69, "elapsed_time": "0:03:03", "remaining_time": "0:50:36", "throughput": 5777.31, "total_tokens": 1057984} +{"current_steps": 2160, "total_steps": 37885, "loss": 0.1111, "lr": 1.1396146740564792e-06, "epoch": 0.2850732479873301, "percentage": 5.7, "elapsed_time": "0:03:03", "remaining_time": "0:50:34", "throughput": 5781.74, "total_tokens": 1060736} +{"current_steps": 2165, "total_steps": 37885, "loss": 0.1279, "lr": 1.142253892847717e-06, "epoch": 0.2857331397650785, "percentage": 5.71, "elapsed_time": "0:03:03", "remaining_time": "0:50:32", "throughput": 5785.72, "total_tokens": 1063424} +{"current_steps": 2170, "total_steps": 37885, "loss": 0.0038, "lr": 1.1448931116389548e-06, "epoch": 0.28639303154282697, "percentage": 5.73, "elapsed_time": "0:03:04", "remaining_time": "0:50:30", "throughput": 5787.78, "total_tokens": 1065728} +{"current_steps": 2175, "total_steps": 37885, "loss": 0.0007, "lr": 1.1475323304301928e-06, "epoch": 0.2870529233205754, "percentage": 5.74, "elapsed_time": "0:03:04", "remaining_time": "0:50:28", "throughput": 5790.65, "total_tokens": 1068160} +{"current_steps": 2180, "total_steps": 37885, "loss": 0.2872, "lr": 1.1501715492214303e-06, "epoch": 0.28771281509832386, "percentage": 5.75, "elapsed_time": "0:03:04", "remaining_time": "0:50:26", "throughput": 5793.37, "total_tokens": 1070592} +{"current_steps": 2185, "total_steps": 37885, "loss": 0.0978, "lr": 1.1528107680126681e-06, "epoch": 0.2883727068760723, "percentage": 5.77, "elapsed_time": "0:03:05", "remaining_time": "0:50:24", "throughput": 5797.41, "total_tokens": 1073280} +{"current_steps": 2190, "total_steps": 37885, "loss": 0.1237, "lr": 1.155449986803906e-06, "epoch": 0.28903259865382075, "percentage": 5.78, "elapsed_time": "0:03:05", "remaining_time": "0:50:22", "throughput": 5799.62, "total_tokens": 1075584} +{"current_steps": 2195, "total_steps": 37885, "loss": 0.1684, "lr": 1.1580892055951439e-06, "epoch": 0.28969249043156925, "percentage": 5.79, "elapsed_time": "0:03:05", "remaining_time": "0:50:20", "throughput": 5802.35, "total_tokens": 1078016} +{"current_steps": 2200, "total_steps": 37885, "loss": 0.1865, "lr": 1.1607284243863814e-06, "epoch": 0.2903523822093177, "percentage": 5.81, "elapsed_time": "0:03:06", "remaining_time": "0:50:18", "throughput": 5805.41, "total_tokens": 1080512} +{"current_steps": 2205, "total_steps": 37885, "loss": 0.169, "lr": 1.1633676431776194e-06, "epoch": 0.29101227398706614, "percentage": 5.82, "elapsed_time": "0:03:06", "remaining_time": "0:50:17", "throughput": 5807.16, "total_tokens": 1082752} +{"current_steps": 2210, "total_steps": 37885, "loss": 0.3035, "lr": 1.1660068619688572e-06, "epoch": 0.2916721657648146, "percentage": 5.83, "elapsed_time": "0:03:06", "remaining_time": "0:50:15", "throughput": 5809.86, "total_tokens": 1085184} +{"current_steps": 2215, "total_steps": 37885, "loss": 0.212, "lr": 1.1686460807600947e-06, "epoch": 0.29233205754256303, "percentage": 5.85, "elapsed_time": "0:03:07", "remaining_time": "0:50:13", "throughput": 5811.28, "total_tokens": 1087360} +{"current_steps": 2220, "total_steps": 37885, "loss": 0.0484, "lr": 1.1712852995513327e-06, "epoch": 0.2929919493203115, "percentage": 5.86, "elapsed_time": "0:03:07", "remaining_time": "0:50:11", "throughput": 5814.75, "total_tokens": 1089984} +{"current_steps": 2225, "total_steps": 37885, "loss": 0.0607, "lr": 1.1739245183425705e-06, "epoch": 0.2936518410980599, "percentage": 5.87, "elapsed_time": "0:03:07", "remaining_time": "0:50:09", "throughput": 5818.29, "total_tokens": 1092608} +{"current_steps": 2230, "total_steps": 37885, "loss": 0.101, "lr": 1.1765637371338085e-06, "epoch": 0.29431173287580836, "percentage": 5.89, "elapsed_time": "0:03:08", "remaining_time": "0:50:07", "throughput": 5820.87, "total_tokens": 1095040} +{"current_steps": 2235, "total_steps": 37885, "loss": 0.1061, "lr": 1.179202955925046e-06, "epoch": 0.2949716246535568, "percentage": 5.9, "elapsed_time": "0:03:08", "remaining_time": "0:50:06", "throughput": 5824.64, "total_tokens": 1097728} +{"current_steps": 2240, "total_steps": 37885, "loss": 0.2662, "lr": 1.1818421747162838e-06, "epoch": 0.29563151643130525, "percentage": 5.91, "elapsed_time": "0:03:08", "remaining_time": "0:50:04", "throughput": 5826.89, "total_tokens": 1100096} +{"current_steps": 2245, "total_steps": 37885, "loss": 0.1855, "lr": 1.1844813935075218e-06, "epoch": 0.2962914082090537, "percentage": 5.93, "elapsed_time": "0:03:09", "remaining_time": "0:50:02", "throughput": 5828.91, "total_tokens": 1102400} +{"current_steps": 2250, "total_steps": 37885, "loss": 0.0741, "lr": 1.1871206122987596e-06, "epoch": 0.29695129998680214, "percentage": 5.94, "elapsed_time": "0:03:09", "remaining_time": "0:50:00", "throughput": 5832.1, "total_tokens": 1104960} +{"current_steps": 2255, "total_steps": 37885, "loss": 0.2804, "lr": 1.1897598310899971e-06, "epoch": 0.2976111917645506, "percentage": 5.95, "elapsed_time": "0:03:09", "remaining_time": "0:49:58", "throughput": 5835.35, "total_tokens": 1107520} +{"current_steps": 2260, "total_steps": 37885, "loss": 0.2522, "lr": 1.1923990498812351e-06, "epoch": 0.2982710835422991, "percentage": 5.97, "elapsed_time": "0:03:10", "remaining_time": "0:49:57", "throughput": 5837.94, "total_tokens": 1109952} +{"current_steps": 2265, "total_steps": 37885, "loss": 0.0116, "lr": 1.195038268672473e-06, "epoch": 0.29893097532004753, "percentage": 5.98, "elapsed_time": "0:03:10", "remaining_time": "0:49:55", "throughput": 5839.25, "total_tokens": 1112128} +{"current_steps": 2270, "total_steps": 37885, "loss": 0.0716, "lr": 1.1976774874637107e-06, "epoch": 0.299590867097796, "percentage": 5.99, "elapsed_time": "0:03:10", "remaining_time": "0:49:53", "throughput": 5842.39, "total_tokens": 1114688} +{"current_steps": 2275, "total_steps": 37885, "loss": 0.2035, "lr": 1.2003167062549485e-06, "epoch": 0.3002507588755444, "percentage": 6.01, "elapsed_time": "0:03:11", "remaining_time": "0:49:51", "throughput": 5845.39, "total_tokens": 1117248} +{"current_steps": 2280, "total_steps": 37885, "loss": 0.1032, "lr": 1.2029559250461862e-06, "epoch": 0.30091065065329287, "percentage": 6.02, "elapsed_time": "0:03:11", "remaining_time": "0:49:50", "throughput": 5848.38, "total_tokens": 1119808} +{"current_steps": 2285, "total_steps": 37885, "loss": 0.4005, "lr": 1.2055951438374242e-06, "epoch": 0.3015705424310413, "percentage": 6.03, "elapsed_time": "0:03:11", "remaining_time": "0:49:48", "throughput": 5850.26, "total_tokens": 1122112} +{"current_steps": 2290, "total_steps": 37885, "loss": 0.1585, "lr": 1.208234362628662e-06, "epoch": 0.30223043420878976, "percentage": 6.04, "elapsed_time": "0:03:12", "remaining_time": "0:49:46", "throughput": 5852.69, "total_tokens": 1124544} +{"current_steps": 2295, "total_steps": 37885, "loss": 0.1725, "lr": 1.2108735814198995e-06, "epoch": 0.3028903259865382, "percentage": 6.06, "elapsed_time": "0:03:12", "remaining_time": "0:49:44", "throughput": 5855.77, "total_tokens": 1127104} +{"current_steps": 2300, "total_steps": 37885, "loss": 0.1268, "lr": 1.2135128002111375e-06, "epoch": 0.30355021776428665, "percentage": 6.07, "elapsed_time": "0:03:12", "remaining_time": "0:49:43", "throughput": 5858.26, "total_tokens": 1129536} +{"current_steps": 2305, "total_steps": 37885, "loss": 0.0557, "lr": 1.2161520190023753e-06, "epoch": 0.3042101095420351, "percentage": 6.08, "elapsed_time": "0:03:13", "remaining_time": "0:49:41", "throughput": 5860.12, "total_tokens": 1131840} +{"current_steps": 2310, "total_steps": 37885, "loss": 0.128, "lr": 1.2187912377936129e-06, "epoch": 0.30487000131978353, "percentage": 6.1, "elapsed_time": "0:03:13", "remaining_time": "0:49:39", "throughput": 5862.88, "total_tokens": 1134336} +{"current_steps": 2315, "total_steps": 37885, "loss": 0.3577, "lr": 1.2214304565848509e-06, "epoch": 0.305529893097532, "percentage": 6.11, "elapsed_time": "0:03:13", "remaining_time": "0:49:37", "throughput": 5864.69, "total_tokens": 1136640} +{"current_steps": 2320, "total_steps": 37885, "loss": 0.1164, "lr": 1.2240696753760886e-06, "epoch": 0.3061897848752805, "percentage": 6.12, "elapsed_time": "0:03:14", "remaining_time": "0:49:36", "throughput": 5867.6, "total_tokens": 1139136} +{"current_steps": 2325, "total_steps": 37885, "loss": 0.0429, "lr": 1.2267088941673264e-06, "epoch": 0.3068496766530289, "percentage": 6.14, "elapsed_time": "0:03:14", "remaining_time": "0:49:34", "throughput": 5870.17, "total_tokens": 1141568} +{"current_steps": 2330, "total_steps": 37885, "loss": 0.0024, "lr": 1.2293481129585642e-06, "epoch": 0.30750956843077737, "percentage": 6.15, "elapsed_time": "0:03:14", "remaining_time": "0:49:32", "throughput": 5872.38, "total_tokens": 1143936} +{"current_steps": 2335, "total_steps": 37885, "loss": 0.0776, "lr": 1.231987331749802e-06, "epoch": 0.3081694602085258, "percentage": 6.16, "elapsed_time": "0:03:15", "remaining_time": "0:49:30", "throughput": 5876.06, "total_tokens": 1146624} +{"current_steps": 2340, "total_steps": 37885, "loss": 0.0591, "lr": 1.2346265505410397e-06, "epoch": 0.30882935198627426, "percentage": 6.18, "elapsed_time": "0:03:15", "remaining_time": "0:49:29", "throughput": 5878.97, "total_tokens": 1149120} +{"current_steps": 2345, "total_steps": 37885, "loss": 0.1412, "lr": 1.2372657693322777e-06, "epoch": 0.3094892437640227, "percentage": 6.19, "elapsed_time": "0:03:15", "remaining_time": "0:49:27", "throughput": 5881.07, "total_tokens": 1151488} +{"current_steps": 2350, "total_steps": 37885, "loss": 0.0975, "lr": 1.2399049881235153e-06, "epoch": 0.31014913554177115, "percentage": 6.2, "elapsed_time": "0:03:16", "remaining_time": "0:49:25", "throughput": 5881.9, "total_tokens": 1153600} +{"current_steps": 2355, "total_steps": 37885, "loss": 0.1037, "lr": 1.2425442069147532e-06, "epoch": 0.3108090273195196, "percentage": 6.22, "elapsed_time": "0:03:16", "remaining_time": "0:49:24", "throughput": 5885.1, "total_tokens": 1156224} +{"current_steps": 2360, "total_steps": 37885, "loss": 0.1141, "lr": 1.245183425705991e-06, "epoch": 0.31146891909726804, "percentage": 6.23, "elapsed_time": "0:03:16", "remaining_time": "0:49:22", "throughput": 5887.43, "total_tokens": 1158656} +{"current_steps": 2365, "total_steps": 37885, "loss": 0.1503, "lr": 1.2478226444972288e-06, "epoch": 0.3121288108750165, "percentage": 6.24, "elapsed_time": "0:03:17", "remaining_time": "0:49:20", "throughput": 5891.31, "total_tokens": 1161408} +{"current_steps": 2370, "total_steps": 37885, "loss": 0.0871, "lr": 1.2504618632884666e-06, "epoch": 0.3127887026527649, "percentage": 6.26, "elapsed_time": "0:03:17", "remaining_time": "0:49:19", "throughput": 5894.0, "total_tokens": 1163904} +{"current_steps": 2375, "total_steps": 37885, "loss": 0.3747, "lr": 1.2531010820797043e-06, "epoch": 0.31344859443051337, "percentage": 6.27, "elapsed_time": "0:03:17", "remaining_time": "0:49:17", "throughput": 5895.66, "total_tokens": 1166208} +{"current_steps": 2380, "total_steps": 37885, "loss": 0.1877, "lr": 1.2557403008709421e-06, "epoch": 0.3141084862082619, "percentage": 6.28, "elapsed_time": "0:03:18", "remaining_time": "0:49:15", "throughput": 5897.34, "total_tokens": 1168512} +{"current_steps": 2385, "total_steps": 37885, "loss": 0.2214, "lr": 1.25837951966218e-06, "epoch": 0.3147683779860103, "percentage": 6.3, "elapsed_time": "0:03:18", "remaining_time": "0:49:14", "throughput": 5900.26, "total_tokens": 1171072} +{"current_steps": 2390, "total_steps": 37885, "loss": 0.0104, "lr": 1.2610187384534177e-06, "epoch": 0.31542826976375876, "percentage": 6.31, "elapsed_time": "0:03:18", "remaining_time": "0:49:12", "throughput": 5901.68, "total_tokens": 1173312} +{"current_steps": 2395, "total_steps": 37885, "loss": 0.1493, "lr": 1.2636579572446554e-06, "epoch": 0.3160881615415072, "percentage": 6.32, "elapsed_time": "0:03:19", "remaining_time": "0:49:11", "throughput": 5903.9, "total_tokens": 1175744} +{"current_steps": 2400, "total_steps": 37885, "loss": 0.2014, "lr": 1.2662971760358934e-06, "epoch": 0.31674805331925565, "percentage": 6.33, "elapsed_time": "0:03:19", "remaining_time": "0:49:09", "throughput": 5905.91, "total_tokens": 1178112} +{"current_steps": 2405, "total_steps": 37885, "loss": 0.0012, "lr": 1.2689363948271312e-06, "epoch": 0.3174079450970041, "percentage": 6.35, "elapsed_time": "0:03:19", "remaining_time": "0:49:07", "throughput": 5907.28, "total_tokens": 1180352} +{"current_steps": 2410, "total_steps": 37885, "loss": 0.1906, "lr": 1.2715756136183688e-06, "epoch": 0.31806783687475254, "percentage": 6.36, "elapsed_time": "0:03:20", "remaining_time": "0:49:06", "throughput": 5908.3, "total_tokens": 1182528} +{"current_steps": 2415, "total_steps": 37885, "loss": 0.018, "lr": 1.2742148324096067e-06, "epoch": 0.318727728652501, "percentage": 6.37, "elapsed_time": "0:03:20", "remaining_time": "0:49:04", "throughput": 5912.0, "total_tokens": 1185280} +{"current_steps": 2420, "total_steps": 37885, "loss": 0.0975, "lr": 1.2768540512008445e-06, "epoch": 0.31938762043024943, "percentage": 6.39, "elapsed_time": "0:03:20", "remaining_time": "0:49:03", "throughput": 5915.65, "total_tokens": 1188032} +{"current_steps": 2425, "total_steps": 37885, "loss": 0.2283, "lr": 1.279493269992082e-06, "epoch": 0.3200475122079979, "percentage": 6.4, "elapsed_time": "0:03:21", "remaining_time": "0:49:01", "throughput": 5917.71, "total_tokens": 1190464} +{"current_steps": 2430, "total_steps": 37885, "loss": 0.192, "lr": 1.28213248878332e-06, "epoch": 0.3207074039857463, "percentage": 6.41, "elapsed_time": "0:03:21", "remaining_time": "0:49:00", "throughput": 5920.2, "total_tokens": 1192960} +{"current_steps": 2435, "total_steps": 37885, "loss": 0.0019, "lr": 1.2847717075745578e-06, "epoch": 0.32136729576349476, "percentage": 6.43, "elapsed_time": "0:03:21", "remaining_time": "0:48:58", "throughput": 5920.81, "total_tokens": 1195072} +{"current_steps": 2440, "total_steps": 37885, "loss": 0.0131, "lr": 1.2874109263657958e-06, "epoch": 0.3220271875412432, "percentage": 6.44, "elapsed_time": "0:03:22", "remaining_time": "0:48:56", "throughput": 5922.43, "total_tokens": 1197376} +{"current_steps": 2445, "total_steps": 37885, "loss": 0.0088, "lr": 1.2900501451570334e-06, "epoch": 0.3226870793189917, "percentage": 6.45, "elapsed_time": "0:03:22", "remaining_time": "0:48:55", "throughput": 5925.17, "total_tokens": 1199936} +{"current_steps": 2450, "total_steps": 37885, "loss": 0.0404, "lr": 1.2926893639482712e-06, "epoch": 0.32334697109674015, "percentage": 6.47, "elapsed_time": "0:03:22", "remaining_time": "0:48:53", "throughput": 5927.26, "total_tokens": 1202368} +{"current_steps": 2455, "total_steps": 37885, "loss": 0.0721, "lr": 1.2953285827395091e-06, "epoch": 0.3240068628744886, "percentage": 6.48, "elapsed_time": "0:03:23", "remaining_time": "0:48:52", "throughput": 5929.46, "total_tokens": 1204800} +{"current_steps": 2460, "total_steps": 37885, "loss": 0.1065, "lr": 1.297967801530747e-06, "epoch": 0.32466675465223704, "percentage": 6.49, "elapsed_time": "0:03:23", "remaining_time": "0:48:50", "throughput": 5933.1, "total_tokens": 1207552} +{"current_steps": 2465, "total_steps": 37885, "loss": 0.2085, "lr": 1.3006070203219845e-06, "epoch": 0.3253266464299855, "percentage": 6.51, "elapsed_time": "0:03:23", "remaining_time": "0:48:49", "throughput": 5934.6, "total_tokens": 1209856} +{"current_steps": 2470, "total_steps": 37885, "loss": 0.1298, "lr": 1.3032462391132225e-06, "epoch": 0.32598653820773393, "percentage": 6.52, "elapsed_time": "0:03:24", "remaining_time": "0:48:47", "throughput": 5936.98, "total_tokens": 1212352} +{"current_steps": 2475, "total_steps": 37885, "loss": 0.0053, "lr": 1.3058854579044602e-06, "epoch": 0.3266464299854824, "percentage": 6.53, "elapsed_time": "0:03:24", "remaining_time": "0:48:46", "throughput": 5939.79, "total_tokens": 1214912} +{"current_steps": 2480, "total_steps": 37885, "loss": 0.0804, "lr": 1.308524676695698e-06, "epoch": 0.3273063217632308, "percentage": 6.55, "elapsed_time": "0:03:24", "remaining_time": "0:48:44", "throughput": 5940.78, "total_tokens": 1217088} +{"current_steps": 2485, "total_steps": 37885, "loss": 0.1441, "lr": 1.3111638954869358e-06, "epoch": 0.32796621354097927, "percentage": 6.56, "elapsed_time": "0:03:25", "remaining_time": "0:48:43", "throughput": 5943.4, "total_tokens": 1219584} +{"current_steps": 2490, "total_steps": 37885, "loss": 0.1647, "lr": 1.3138031142781736e-06, "epoch": 0.3286261053187277, "percentage": 6.57, "elapsed_time": "0:03:25", "remaining_time": "0:48:41", "throughput": 5947.04, "total_tokens": 1222336} +{"current_steps": 2495, "total_steps": 37885, "loss": 0.16, "lr": 1.3164423330694115e-06, "epoch": 0.32928599709647616, "percentage": 6.59, "elapsed_time": "0:03:25", "remaining_time": "0:48:40", "throughput": 5949.52, "total_tokens": 1224832} +{"current_steps": 2500, "total_steps": 37885, "loss": 0.3234, "lr": 1.3190815518606493e-06, "epoch": 0.3299458888742246, "percentage": 6.6, "elapsed_time": "0:03:26", "remaining_time": "0:48:38", "throughput": 5952.33, "total_tokens": 1227392} +{"current_steps": 2505, "total_steps": 37885, "loss": 0.1718, "lr": 1.3217207706518869e-06, "epoch": 0.3306057806519731, "percentage": 6.61, "elapsed_time": "0:03:26", "remaining_time": "0:48:37", "throughput": 5955.07, "total_tokens": 1229952} +{"current_steps": 2510, "total_steps": 37885, "loss": 0.0817, "lr": 1.3243599894431249e-06, "epoch": 0.33126567242972155, "percentage": 6.63, "elapsed_time": "0:03:26", "remaining_time": "0:48:35", "throughput": 5958.34, "total_tokens": 1232640} +{"current_steps": 2515, "total_steps": 37885, "loss": 0.0409, "lr": 1.3269992082343626e-06, "epoch": 0.33192556420747, "percentage": 6.64, "elapsed_time": "0:03:27", "remaining_time": "0:48:34", "throughput": 5961.04, "total_tokens": 1235200} +{"current_steps": 2520, "total_steps": 37885, "loss": 0.0289, "lr": 1.3296384270256002e-06, "epoch": 0.33258545598521844, "percentage": 6.65, "elapsed_time": "0:03:27", "remaining_time": "0:48:32", "throughput": 5963.22, "total_tokens": 1237632} +{"current_steps": 2525, "total_steps": 37885, "loss": 0.1123, "lr": 1.3322776458168382e-06, "epoch": 0.3332453477629669, "percentage": 6.66, "elapsed_time": "0:03:27", "remaining_time": "0:48:31", "throughput": 5964.75, "total_tokens": 1239936} +{"current_steps": 2530, "total_steps": 37885, "loss": 0.2255, "lr": 1.334916864608076e-06, "epoch": 0.3339052395407153, "percentage": 6.68, "elapsed_time": "0:03:28", "remaining_time": "0:48:29", "throughput": 5966.53, "total_tokens": 1242304} +{"current_steps": 2535, "total_steps": 37885, "loss": 0.0004, "lr": 1.3375560833993137e-06, "epoch": 0.33456513131846377, "percentage": 6.69, "elapsed_time": "0:03:28", "remaining_time": "0:48:28", "throughput": 5968.61, "total_tokens": 1244736} +{"current_steps": 2540, "total_steps": 37885, "loss": 0.2463, "lr": 1.3401953021905515e-06, "epoch": 0.3352250230962122, "percentage": 6.7, "elapsed_time": "0:03:28", "remaining_time": "0:48:26", "throughput": 5972.17, "total_tokens": 1247488} +{"current_steps": 2545, "total_steps": 37885, "loss": 0.1187, "lr": 1.3428345209817893e-06, "epoch": 0.33588491487396066, "percentage": 6.72, "elapsed_time": "0:03:29", "remaining_time": "0:48:25", "throughput": 5974.95, "total_tokens": 1250048} +{"current_steps": 2550, "total_steps": 37885, "loss": 0.2226, "lr": 1.345473739773027e-06, "epoch": 0.3365448066517091, "percentage": 6.73, "elapsed_time": "0:03:29", "remaining_time": "0:48:23", "throughput": 5979.03, "total_tokens": 1252928} +{"current_steps": 2555, "total_steps": 37885, "loss": 0.2176, "lr": 1.348112958564265e-06, "epoch": 0.33720469842945755, "percentage": 6.74, "elapsed_time": "0:03:29", "remaining_time": "0:48:22", "throughput": 5981.81, "total_tokens": 1255488} +{"current_steps": 2560, "total_steps": 37885, "loss": 0.4771, "lr": 1.3507521773555026e-06, "epoch": 0.337864590207206, "percentage": 6.76, "elapsed_time": "0:03:30", "remaining_time": "0:48:20", "throughput": 5984.2, "total_tokens": 1257984} +{"current_steps": 2565, "total_steps": 37885, "loss": 0.1681, "lr": 1.3533913961467406e-06, "epoch": 0.3385244819849545, "percentage": 6.77, "elapsed_time": "0:03:30", "remaining_time": "0:48:19", "throughput": 5986.46, "total_tokens": 1260416} +{"current_steps": 2570, "total_steps": 37885, "loss": 0.1162, "lr": 1.3560306149379783e-06, "epoch": 0.33918437376270294, "percentage": 6.78, "elapsed_time": "0:03:30", "remaining_time": "0:48:17", "throughput": 5990.01, "total_tokens": 1263168} +{"current_steps": 2575, "total_steps": 37885, "loss": 0.1092, "lr": 1.3586698337292161e-06, "epoch": 0.3398442655404514, "percentage": 6.8, "elapsed_time": "0:03:31", "remaining_time": "0:48:16", "throughput": 5992.2, "total_tokens": 1265600} +{"current_steps": 2580, "total_steps": 37885, "loss": 0.0017, "lr": 1.361309052520454e-06, "epoch": 0.34050415731819983, "percentage": 6.81, "elapsed_time": "0:03:31", "remaining_time": "0:48:14", "throughput": 5994.44, "total_tokens": 1268032} +{"current_steps": 2585, "total_steps": 37885, "loss": 0.124, "lr": 1.3639482713116917e-06, "epoch": 0.3411640490959483, "percentage": 6.82, "elapsed_time": "0:03:31", "remaining_time": "0:48:13", "throughput": 5996.07, "total_tokens": 1270336} +{"current_steps": 2590, "total_steps": 37885, "loss": 0.0837, "lr": 1.3665874901029294e-06, "epoch": 0.3418239408736967, "percentage": 6.84, "elapsed_time": "0:03:32", "remaining_time": "0:48:11", "throughput": 5999.68, "total_tokens": 1273088} +{"current_steps": 2595, "total_steps": 37885, "loss": 0.2435, "lr": 1.3692267088941674e-06, "epoch": 0.34248383265144516, "percentage": 6.85, "elapsed_time": "0:03:32", "remaining_time": "0:48:10", "throughput": 6001.54, "total_tokens": 1275456} +{"current_steps": 2600, "total_steps": 37885, "loss": 0.0428, "lr": 1.371865927685405e-06, "epoch": 0.3431437244291936, "percentage": 6.86, "elapsed_time": "0:03:32", "remaining_time": "0:48:08", "throughput": 6003.74, "total_tokens": 1277888} +{"current_steps": 2605, "total_steps": 37885, "loss": 0.0011, "lr": 1.3745051464766428e-06, "epoch": 0.34380361620694205, "percentage": 6.88, "elapsed_time": "0:03:33", "remaining_time": "0:48:07", "throughput": 6006.18, "total_tokens": 1280384} +{"current_steps": 2610, "total_steps": 37885, "loss": 0.0331, "lr": 1.3771443652678807e-06, "epoch": 0.3444635079846905, "percentage": 6.89, "elapsed_time": "0:03:33", "remaining_time": "0:48:05", "throughput": 6008.85, "total_tokens": 1282944} +{"current_steps": 2615, "total_steps": 37885, "loss": 0.1367, "lr": 1.3797835840591185e-06, "epoch": 0.34512339976243894, "percentage": 6.9, "elapsed_time": "0:03:33", "remaining_time": "0:48:04", "throughput": 6010.16, "total_tokens": 1285184} +{"current_steps": 2620, "total_steps": 37885, "loss": 0.202, "lr": 1.382422802850356e-06, "epoch": 0.3457832915401874, "percentage": 6.92, "elapsed_time": "0:03:34", "remaining_time": "0:48:02", "throughput": 6013.1, "total_tokens": 1287808} +{"current_steps": 2625, "total_steps": 37885, "loss": 0.1634, "lr": 1.385062021641594e-06, "epoch": 0.34644318331793583, "percentage": 6.93, "elapsed_time": "0:03:34", "remaining_time": "0:48:01", "throughput": 6016.08, "total_tokens": 1290432} +{"current_steps": 2630, "total_steps": 37885, "loss": 0.1988, "lr": 1.3877012404328318e-06, "epoch": 0.34710307509568433, "percentage": 6.94, "elapsed_time": "0:03:34", "remaining_time": "0:47:59", "throughput": 6017.58, "total_tokens": 1292736} +{"current_steps": 2635, "total_steps": 37885, "loss": 0.1405, "lr": 1.3903404592240694e-06, "epoch": 0.3477629668734328, "percentage": 6.96, "elapsed_time": "0:03:35", "remaining_time": "0:47:58", "throughput": 6019.37, "total_tokens": 1295104} +{"current_steps": 2640, "total_steps": 37885, "loss": 0.0939, "lr": 1.3929796780153074e-06, "epoch": 0.3484228586511812, "percentage": 6.97, "elapsed_time": "0:03:35", "remaining_time": "0:47:56", "throughput": 6021.74, "total_tokens": 1297600} +{"current_steps": 2645, "total_steps": 37885, "loss": 0.0013, "lr": 1.3956188968065452e-06, "epoch": 0.34908275042892967, "percentage": 6.98, "elapsed_time": "0:03:35", "remaining_time": "0:47:55", "throughput": 6024.64, "total_tokens": 1300224} +{"current_steps": 2650, "total_steps": 37885, "loss": 0.1809, "lr": 1.3982581155977831e-06, "epoch": 0.3497426422066781, "percentage": 6.99, "elapsed_time": "0:03:36", "remaining_time": "0:47:54", "throughput": 6025.9, "total_tokens": 1302528} +{"current_steps": 2655, "total_steps": 37885, "loss": 0.2074, "lr": 1.4008973343890207e-06, "epoch": 0.35040253398442656, "percentage": 7.01, "elapsed_time": "0:03:36", "remaining_time": "0:47:52", "throughput": 6028.09, "total_tokens": 1305024} +{"current_steps": 2660, "total_steps": 37885, "loss": 0.2161, "lr": 1.4035365531802585e-06, "epoch": 0.351062425762175, "percentage": 7.02, "elapsed_time": "0:03:36", "remaining_time": "0:47:51", "throughput": 6029.83, "total_tokens": 1307392} +{"current_steps": 2665, "total_steps": 37885, "loss": 0.2352, "lr": 1.4061757719714965e-06, "epoch": 0.35172231753992345, "percentage": 7.03, "elapsed_time": "0:03:37", "remaining_time": "0:47:49", "throughput": 6032.68, "total_tokens": 1310016} +{"current_steps": 2670, "total_steps": 37885, "loss": 0.0256, "lr": 1.4088149907627342e-06, "epoch": 0.3523822093176719, "percentage": 7.05, "elapsed_time": "0:03:37", "remaining_time": "0:47:48", "throughput": 6035.2, "total_tokens": 1312576} +{"current_steps": 2675, "total_steps": 37885, "loss": 0.1432, "lr": 1.4114542095539718e-06, "epoch": 0.35304210109542034, "percentage": 7.06, "elapsed_time": "0:03:37", "remaining_time": "0:47:47", "throughput": 6037.4, "total_tokens": 1315072} +{"current_steps": 2680, "total_steps": 37885, "loss": 0.1524, "lr": 1.4140934283452098e-06, "epoch": 0.3537019928731688, "percentage": 7.07, "elapsed_time": "0:03:38", "remaining_time": "0:47:45", "throughput": 6039.39, "total_tokens": 1317504} +{"current_steps": 2685, "total_steps": 37885, "loss": 0.001, "lr": 1.4167326471364476e-06, "epoch": 0.3543618846509172, "percentage": 7.09, "elapsed_time": "0:03:38", "remaining_time": "0:47:44", "throughput": 6040.27, "total_tokens": 1319680} +{"current_steps": 2690, "total_steps": 37885, "loss": 0.0008, "lr": 1.4193718659276853e-06, "epoch": 0.3550217764286657, "percentage": 7.1, "elapsed_time": "0:03:38", "remaining_time": "0:47:42", "throughput": 6042.81, "total_tokens": 1322240} +{"current_steps": 2695, "total_steps": 37885, "loss": 0.1094, "lr": 1.4220110847189231e-06, "epoch": 0.35568166820641417, "percentage": 7.11, "elapsed_time": "0:03:39", "remaining_time": "0:47:41", "throughput": 6045.01, "total_tokens": 1324736} +{"current_steps": 2700, "total_steps": 37885, "loss": 0.1183, "lr": 1.4246503035101609e-06, "epoch": 0.3563415599841626, "percentage": 7.13, "elapsed_time": "0:03:39", "remaining_time": "0:47:40", "throughput": 6046.78, "total_tokens": 1327104} +{"current_steps": 2705, "total_steps": 37885, "loss": 0.2524, "lr": 1.4272895223013989e-06, "epoch": 0.35700145176191106, "percentage": 7.14, "elapsed_time": "0:03:39", "remaining_time": "0:47:38", "throughput": 6050.26, "total_tokens": 1329920} +{"current_steps": 2710, "total_steps": 37885, "loss": 0.1706, "lr": 1.4299287410926366e-06, "epoch": 0.3576613435396595, "percentage": 7.15, "elapsed_time": "0:03:40", "remaining_time": "0:47:37", "throughput": 6052.29, "total_tokens": 1332352} +{"current_steps": 2715, "total_steps": 37885, "loss": 0.0205, "lr": 1.4325679598838742e-06, "epoch": 0.35832123531740795, "percentage": 7.17, "elapsed_time": "0:03:40", "remaining_time": "0:47:35", "throughput": 6055.36, "total_tokens": 1335040} +{"current_steps": 2720, "total_steps": 37885, "loss": 0.003, "lr": 1.4352071786751122e-06, "epoch": 0.3589811270951564, "percentage": 7.18, "elapsed_time": "0:03:40", "remaining_time": "0:47:34", "throughput": 6057.38, "total_tokens": 1337472} +{"current_steps": 2725, "total_steps": 37885, "loss": 0.0809, "lr": 1.43784639746635e-06, "epoch": 0.35964101887290484, "percentage": 7.19, "elapsed_time": "0:03:41", "remaining_time": "0:47:33", "throughput": 6058.58, "total_tokens": 1339712} +{"current_steps": 2730, "total_steps": 37885, "loss": 0.0011, "lr": 1.4404856162575877e-06, "epoch": 0.3603009106506533, "percentage": 7.21, "elapsed_time": "0:03:41", "remaining_time": "0:47:31", "throughput": 6060.6, "total_tokens": 1342144} +{"current_steps": 2735, "total_steps": 37885, "loss": 0.0817, "lr": 1.4431248350488255e-06, "epoch": 0.36096080242840173, "percentage": 7.22, "elapsed_time": "0:03:41", "remaining_time": "0:47:30", "throughput": 6062.42, "total_tokens": 1344512} +{"current_steps": 2740, "total_steps": 37885, "loss": 0.4208, "lr": 1.4457640538400633e-06, "epoch": 0.3616206942061502, "percentage": 7.23, "elapsed_time": "0:03:42", "remaining_time": "0:47:28", "throughput": 6064.15, "total_tokens": 1346880} +{"current_steps": 2745, "total_steps": 37885, "loss": 0.0683, "lr": 1.448403272631301e-06, "epoch": 0.3622805859838986, "percentage": 7.25, "elapsed_time": "0:03:42", "remaining_time": "0:47:27", "throughput": 6065.06, "total_tokens": 1349056} +{"current_steps": 2750, "total_steps": 37885, "loss": 0.1806, "lr": 1.4510424914225388e-06, "epoch": 0.36294047776164706, "percentage": 7.26, "elapsed_time": "0:03:42", "remaining_time": "0:47:26", "throughput": 6066.94, "total_tokens": 1351488} +{"current_steps": 2755, "total_steps": 37885, "loss": 0.0011, "lr": 1.4536817102137766e-06, "epoch": 0.36360036953939556, "percentage": 7.27, "elapsed_time": "0:03:43", "remaining_time": "0:47:24", "throughput": 6068.86, "total_tokens": 1353920} +{"current_steps": 2760, "total_steps": 37885, "loss": 0.3443, "lr": 1.4563209290050144e-06, "epoch": 0.364260261317144, "percentage": 7.29, "elapsed_time": "0:03:43", "remaining_time": "0:47:23", "throughput": 6070.83, "total_tokens": 1356352} +{"current_steps": 2765, "total_steps": 37885, "loss": 0.2605, "lr": 1.4589601477962524e-06, "epoch": 0.36492015309489245, "percentage": 7.3, "elapsed_time": "0:03:43", "remaining_time": "0:47:22", "throughput": 6074.12, "total_tokens": 1359104} +{"current_steps": 2770, "total_steps": 37885, "loss": 0.2077, "lr": 1.46159936658749e-06, "epoch": 0.3655800448726409, "percentage": 7.31, "elapsed_time": "0:03:44", "remaining_time": "0:47:20", "throughput": 6076.03, "total_tokens": 1361536} +{"current_steps": 2775, "total_steps": 37885, "loss": 0.3646, "lr": 1.464238585378728e-06, "epoch": 0.36623993665038934, "percentage": 7.32, "elapsed_time": "0:03:44", "remaining_time": "0:47:19", "throughput": 6078.79, "total_tokens": 1364160} +{"current_steps": 2780, "total_steps": 37885, "loss": 0.0023, "lr": 1.4668778041699657e-06, "epoch": 0.3668998284281378, "percentage": 7.34, "elapsed_time": "0:03:44", "remaining_time": "0:47:17", "throughput": 6081.06, "total_tokens": 1366656} +{"current_steps": 2785, "total_steps": 37885, "loss": 0.142, "lr": 1.4695170229612034e-06, "epoch": 0.36755972020588623, "percentage": 7.35, "elapsed_time": "0:03:45", "remaining_time": "0:47:16", "throughput": 6083.56, "total_tokens": 1369216} +{"current_steps": 2790, "total_steps": 37885, "loss": 0.2354, "lr": 1.4721562417524412e-06, "epoch": 0.3682196119836347, "percentage": 7.36, "elapsed_time": "0:03:45", "remaining_time": "0:47:15", "throughput": 6085.7, "total_tokens": 1371712} +{"current_steps": 2795, "total_steps": 37885, "loss": 0.1816, "lr": 1.474795460543679e-06, "epoch": 0.3688795037613831, "percentage": 7.38, "elapsed_time": "0:03:45", "remaining_time": "0:47:13", "throughput": 6087.39, "total_tokens": 1374080} +{"current_steps": 2800, "total_steps": 37885, "loss": 0.1143, "lr": 1.4774346793349168e-06, "epoch": 0.36953939553913157, "percentage": 7.39, "elapsed_time": "0:03:46", "remaining_time": "0:47:12", "throughput": 6091.29, "total_tokens": 1377024} +{"current_steps": 2805, "total_steps": 37885, "loss": 0.122, "lr": 1.4800738981261548e-06, "epoch": 0.37019928731688, "percentage": 7.4, "elapsed_time": "0:03:46", "remaining_time": "0:47:11", "throughput": 6092.92, "total_tokens": 1379392} +{"current_steps": 2810, "total_steps": 37885, "loss": 0.1612, "lr": 1.4827131169173923e-06, "epoch": 0.37085917909462845, "percentage": 7.42, "elapsed_time": "0:03:46", "remaining_time": "0:47:09", "throughput": 6094.64, "total_tokens": 1381760} +{"current_steps": 2815, "total_steps": 37885, "loss": 0.0652, "lr": 1.48535233570863e-06, "epoch": 0.37151907087237696, "percentage": 7.43, "elapsed_time": "0:03:47", "remaining_time": "0:47:08", "throughput": 6096.26, "total_tokens": 1384128} +{"current_steps": 2820, "total_steps": 37885, "loss": 0.053, "lr": 1.487991554499868e-06, "epoch": 0.3721789626501254, "percentage": 7.44, "elapsed_time": "0:03:47", "remaining_time": "0:47:07", "throughput": 6097.89, "total_tokens": 1386496} +{"current_steps": 2825, "total_steps": 37885, "loss": 0.1133, "lr": 1.4906307732911058e-06, "epoch": 0.37283885442787384, "percentage": 7.46, "elapsed_time": "0:03:47", "remaining_time": "0:47:05", "throughput": 6099.0, "total_tokens": 1388736} +{"current_steps": 2830, "total_steps": 37885, "loss": 0.0446, "lr": 1.4932699920823434e-06, "epoch": 0.3734987462056223, "percentage": 7.47, "elapsed_time": "0:03:48", "remaining_time": "0:47:04", "throughput": 6100.4, "total_tokens": 1391040} +{"current_steps": 2835, "total_steps": 37885, "loss": 0.093, "lr": 1.4959092108735814e-06, "epoch": 0.37415863798337073, "percentage": 7.48, "elapsed_time": "0:03:48", "remaining_time": "0:47:03", "throughput": 6101.82, "total_tokens": 1393344} +{"current_steps": 2840, "total_steps": 37885, "loss": 0.074, "lr": 1.4985484296648192e-06, "epoch": 0.3748185297611192, "percentage": 7.5, "elapsed_time": "0:03:48", "remaining_time": "0:47:01", "throughput": 6103.24, "total_tokens": 1395648} +{"current_steps": 2845, "total_steps": 37885, "loss": 0.2042, "lr": 1.5011876484560567e-06, "epoch": 0.3754784215388676, "percentage": 7.51, "elapsed_time": "0:03:49", "remaining_time": "0:47:00", "throughput": 6105.96, "total_tokens": 1398272} +{"current_steps": 2850, "total_steps": 37885, "loss": 0.2285, "lr": 1.5038268672472947e-06, "epoch": 0.37613831331661607, "percentage": 7.52, "elapsed_time": "0:03:49", "remaining_time": "0:46:59", "throughput": 6107.27, "total_tokens": 1400576} +{"current_steps": 2855, "total_steps": 37885, "loss": 0.049, "lr": 1.5064660860385325e-06, "epoch": 0.3767982050943645, "percentage": 7.54, "elapsed_time": "0:03:49", "remaining_time": "0:46:57", "throughput": 6109.65, "total_tokens": 1403136} +{"current_steps": 2860, "total_steps": 37885, "loss": 0.0759, "lr": 1.5091053048297705e-06, "epoch": 0.37745809687211296, "percentage": 7.55, "elapsed_time": "0:03:49", "remaining_time": "0:46:56", "throughput": 6111.32, "total_tokens": 1405504} +{"current_steps": 2865, "total_steps": 37885, "loss": 0.2051, "lr": 1.511744523621008e-06, "epoch": 0.3781179886498614, "percentage": 7.56, "elapsed_time": "0:03:50", "remaining_time": "0:46:55", "throughput": 6112.64, "total_tokens": 1407808} +{"current_steps": 2870, "total_steps": 37885, "loss": 0.1983, "lr": 1.5143837424122458e-06, "epoch": 0.37877788042760985, "percentage": 7.58, "elapsed_time": "0:03:50", "remaining_time": "0:46:53", "throughput": 6114.71, "total_tokens": 1410304} +{"current_steps": 2875, "total_steps": 37885, "loss": 0.0052, "lr": 1.5170229612034838e-06, "epoch": 0.37943777220535835, "percentage": 7.59, "elapsed_time": "0:03:50", "remaining_time": "0:46:52", "throughput": 6116.58, "total_tokens": 1412736} +{"current_steps": 2880, "total_steps": 37885, "loss": 0.1043, "lr": 1.5196621799947216e-06, "epoch": 0.3800976639831068, "percentage": 7.6, "elapsed_time": "0:03:51", "remaining_time": "0:46:51", "throughput": 6119.66, "total_tokens": 1415488} +{"current_steps": 2885, "total_steps": 37885, "loss": 0.148, "lr": 1.5223013987859591e-06, "epoch": 0.38075755576085524, "percentage": 7.62, "elapsed_time": "0:03:51", "remaining_time": "0:46:50", "throughput": 6121.16, "total_tokens": 1417856} +{"current_steps": 2890, "total_steps": 37885, "loss": 0.1376, "lr": 1.5249406175771971e-06, "epoch": 0.3814174475386037, "percentage": 7.63, "elapsed_time": "0:03:51", "remaining_time": "0:46:48", "throughput": 6122.23, "total_tokens": 1420096} +{"current_steps": 2895, "total_steps": 37885, "loss": 0.1461, "lr": 1.5275798363684349e-06, "epoch": 0.3820773393163521, "percentage": 7.64, "elapsed_time": "0:03:52", "remaining_time": "0:46:47", "throughput": 6124.59, "total_tokens": 1422656} +{"current_steps": 2900, "total_steps": 37885, "loss": 0.1672, "lr": 1.5302190551596727e-06, "epoch": 0.38273723109410057, "percentage": 7.65, "elapsed_time": "0:03:52", "remaining_time": "0:46:46", "throughput": 6126.68, "total_tokens": 1425152} +{"current_steps": 2905, "total_steps": 37885, "loss": 0.0423, "lr": 1.5328582739509104e-06, "epoch": 0.383397122871849, "percentage": 7.67, "elapsed_time": "0:03:52", "remaining_time": "0:46:44", "throughput": 6129.51, "total_tokens": 1427840} +{"current_steps": 2910, "total_steps": 37885, "loss": 0.0218, "lr": 1.5354974927421482e-06, "epoch": 0.38405701464959746, "percentage": 7.68, "elapsed_time": "0:03:53", "remaining_time": "0:46:43", "throughput": 6130.87, "total_tokens": 1430144} +{"current_steps": 2915, "total_steps": 37885, "loss": 0.1745, "lr": 1.5381367115333862e-06, "epoch": 0.3847169064273459, "percentage": 7.69, "elapsed_time": "0:03:53", "remaining_time": "0:46:42", "throughput": 6132.15, "total_tokens": 1432448} +{"current_steps": 2920, "total_steps": 37885, "loss": 0.0026, "lr": 1.540775930324624e-06, "epoch": 0.38537679820509435, "percentage": 7.71, "elapsed_time": "0:03:53", "remaining_time": "0:46:41", "throughput": 6133.45, "total_tokens": 1434752} +{"current_steps": 2925, "total_steps": 37885, "loss": 0.001, "lr": 1.5434151491158615e-06, "epoch": 0.3860366899828428, "percentage": 7.72, "elapsed_time": "0:03:54", "remaining_time": "0:46:39", "throughput": 6135.26, "total_tokens": 1437184} +{"current_steps": 2930, "total_steps": 37885, "loss": 0.051, "lr": 1.5460543679070995e-06, "epoch": 0.38669658176059124, "percentage": 7.73, "elapsed_time": "0:03:54", "remaining_time": "0:46:38", "throughput": 6138.46, "total_tokens": 1440000} +{"current_steps": 2935, "total_steps": 37885, "loss": 0.0864, "lr": 1.5486935866983373e-06, "epoch": 0.3873564735383397, "percentage": 7.75, "elapsed_time": "0:03:54", "remaining_time": "0:46:37", "throughput": 6140.9, "total_tokens": 1442624} +{"current_steps": 2940, "total_steps": 37885, "loss": 0.291, "lr": 1.551332805489575e-06, "epoch": 0.3880163653160882, "percentage": 7.76, "elapsed_time": "0:03:55", "remaining_time": "0:46:36", "throughput": 6143.16, "total_tokens": 1445184} +{"current_steps": 2945, "total_steps": 37885, "loss": 0.2756, "lr": 1.5539720242808128e-06, "epoch": 0.38867625709383663, "percentage": 7.77, "elapsed_time": "0:03:55", "remaining_time": "0:46:34", "throughput": 6144.87, "total_tokens": 1447616} +{"current_steps": 2950, "total_steps": 37885, "loss": 0.0006, "lr": 1.5566112430720506e-06, "epoch": 0.3893361488715851, "percentage": 7.79, "elapsed_time": "0:03:55", "remaining_time": "0:46:33", "throughput": 6147.05, "total_tokens": 1450176} +{"current_steps": 2955, "total_steps": 37885, "loss": 0.4024, "lr": 1.5592504618632884e-06, "epoch": 0.3899960406493335, "percentage": 7.8, "elapsed_time": "0:03:56", "remaining_time": "0:46:32", "throughput": 6149.15, "total_tokens": 1452672} +{"current_steps": 2960, "total_steps": 37885, "loss": 0.2948, "lr": 1.5618896806545262e-06, "epoch": 0.39065593242708196, "percentage": 7.81, "elapsed_time": "0:03:56", "remaining_time": "0:46:31", "throughput": 6151.42, "total_tokens": 1455232} +{"current_steps": 2965, "total_steps": 37885, "loss": 0.2825, "lr": 1.564528899445764e-06, "epoch": 0.3913158242048304, "percentage": 7.83, "elapsed_time": "0:03:56", "remaining_time": "0:46:30", "throughput": 6153.64, "total_tokens": 1457792} +{"current_steps": 2970, "total_steps": 37885, "loss": 0.0889, "lr": 1.5671681182370017e-06, "epoch": 0.39197571598257885, "percentage": 7.84, "elapsed_time": "0:03:57", "remaining_time": "0:46:28", "throughput": 6155.2, "total_tokens": 1460160} +{"current_steps": 2975, "total_steps": 37885, "loss": 0.074, "lr": 1.5698073370282397e-06, "epoch": 0.3926356077603273, "percentage": 7.85, "elapsed_time": "0:03:57", "remaining_time": "0:46:27", "throughput": 6156.26, "total_tokens": 1462400} +{"current_steps": 2980, "total_steps": 37885, "loss": 0.128, "lr": 1.5724465558194772e-06, "epoch": 0.39329549953807574, "percentage": 7.87, "elapsed_time": "0:03:57", "remaining_time": "0:46:26", "throughput": 6158.98, "total_tokens": 1465088} +{"current_steps": 2985, "total_steps": 37885, "loss": 0.1378, "lr": 1.5750857746107152e-06, "epoch": 0.3939553913158242, "percentage": 7.88, "elapsed_time": "0:03:58", "remaining_time": "0:46:25", "throughput": 6160.51, "total_tokens": 1467456} +{"current_steps": 2990, "total_steps": 37885, "loss": 0.0013, "lr": 1.577724993401953e-06, "epoch": 0.39461528309357263, "percentage": 7.89, "elapsed_time": "0:03:58", "remaining_time": "0:46:23", "throughput": 6162.71, "total_tokens": 1470016} +{"current_steps": 2995, "total_steps": 37885, "loss": 0.0779, "lr": 1.5803642121931908e-06, "epoch": 0.3952751748713211, "percentage": 7.91, "elapsed_time": "0:03:58", "remaining_time": "0:46:22", "throughput": 6164.5, "total_tokens": 1472448} +{"current_steps": 3000, "total_steps": 37885, "loss": 0.0693, "lr": 1.5830034309844285e-06, "epoch": 0.3959350666490696, "percentage": 7.92, "elapsed_time": "0:03:59", "remaining_time": "0:46:21", "throughput": 6166.87, "total_tokens": 1475072} +{"current_steps": 3005, "total_steps": 37885, "loss": 0.063, "lr": 1.5856426497756663e-06, "epoch": 0.396594958426818, "percentage": 7.93, "elapsed_time": "0:03:59", "remaining_time": "0:46:20", "throughput": 6168.29, "total_tokens": 1477440} +{"current_steps": 3010, "total_steps": 37885, "loss": 0.0011, "lr": 1.588281868566904e-06, "epoch": 0.39725485020456647, "percentage": 7.95, "elapsed_time": "0:03:59", "remaining_time": "0:46:19", "throughput": 6169.94, "total_tokens": 1479872} +{"current_steps": 3015, "total_steps": 37885, "loss": 0.2275, "lr": 1.590921087358142e-06, "epoch": 0.3979147419823149, "percentage": 7.96, "elapsed_time": "0:04:00", "remaining_time": "0:46:17", "throughput": 6170.13, "total_tokens": 1481920} +{"current_steps": 3020, "total_steps": 37885, "loss": 0.2769, "lr": 1.5935603061493796e-06, "epoch": 0.39857463376006336, "percentage": 7.97, "elapsed_time": "0:04:00", "remaining_time": "0:46:16", "throughput": 6174.19, "total_tokens": 1484992} +{"current_steps": 3025, "total_steps": 37885, "loss": 0.3505, "lr": 1.5961995249406174e-06, "epoch": 0.3992345255378118, "percentage": 7.98, "elapsed_time": "0:04:00", "remaining_time": "0:46:15", "throughput": 6175.18, "total_tokens": 1487232} +{"current_steps": 3030, "total_steps": 37885, "loss": 0.19, "lr": 1.5988387437318554e-06, "epoch": 0.39989441731556025, "percentage": 8.0, "elapsed_time": "0:04:01", "remaining_time": "0:46:14", "throughput": 6177.19, "total_tokens": 1489728} +{"current_steps": 3035, "total_steps": 37885, "loss": 0.2316, "lr": 1.6014779625230932e-06, "epoch": 0.4005543090933087, "percentage": 8.01, "elapsed_time": "0:04:01", "remaining_time": "0:46:12", "throughput": 6178.17, "total_tokens": 1491968} +{"current_steps": 3040, "total_steps": 37885, "loss": 0.1632, "lr": 1.6041171813143307e-06, "epoch": 0.40121420087105714, "percentage": 8.02, "elapsed_time": "0:04:01", "remaining_time": "0:46:11", "throughput": 6179.61, "total_tokens": 1494336} +{"current_steps": 3045, "total_steps": 37885, "loss": 0.1404, "lr": 1.6067564001055687e-06, "epoch": 0.4018740926488056, "percentage": 8.04, "elapsed_time": "0:04:02", "remaining_time": "0:46:10", "throughput": 6183.83, "total_tokens": 1497472} +{"current_steps": 3050, "total_steps": 37885, "loss": 0.085, "lr": 1.6093956188968065e-06, "epoch": 0.402533984426554, "percentage": 8.05, "elapsed_time": "0:04:02", "remaining_time": "0:46:09", "throughput": 6185.77, "total_tokens": 1499968} +{"current_steps": 3055, "total_steps": 37885, "loss": 0.0344, "lr": 1.612034837688044e-06, "epoch": 0.40319387620430247, "percentage": 8.06, "elapsed_time": "0:04:02", "remaining_time": "0:46:08", "throughput": 6187.23, "total_tokens": 1502336} +{"current_steps": 3060, "total_steps": 37885, "loss": 0.2477, "lr": 1.614674056479282e-06, "epoch": 0.40385376798205097, "percentage": 8.08, "elapsed_time": "0:04:03", "remaining_time": "0:46:07", "throughput": 6188.16, "total_tokens": 1504576} +{"current_steps": 3065, "total_steps": 37885, "loss": 0.2589, "lr": 1.6173132752705198e-06, "epoch": 0.4045136597597994, "percentage": 8.09, "elapsed_time": "0:04:03", "remaining_time": "0:46:05", "throughput": 6190.35, "total_tokens": 1507136} +{"current_steps": 3070, "total_steps": 37885, "loss": 0.0691, "lr": 1.6199524940617578e-06, "epoch": 0.40517355153754786, "percentage": 8.1, "elapsed_time": "0:04:03", "remaining_time": "0:46:04", "throughput": 6192.51, "total_tokens": 1509696} +{"current_steps": 3075, "total_steps": 37885, "loss": 0.2528, "lr": 1.6225917128529954e-06, "epoch": 0.4058334433152963, "percentage": 8.12, "elapsed_time": "0:04:04", "remaining_time": "0:46:03", "throughput": 6194.4, "total_tokens": 1512192} +{"current_steps": 3080, "total_steps": 37885, "loss": 0.0691, "lr": 1.6252309316442331e-06, "epoch": 0.40649333509304475, "percentage": 8.13, "elapsed_time": "0:04:04", "remaining_time": "0:46:02", "throughput": 6196.55, "total_tokens": 1514752} +{"current_steps": 3085, "total_steps": 37885, "loss": 0.3173, "lr": 1.6278701504354711e-06, "epoch": 0.4071532268707932, "percentage": 8.14, "elapsed_time": "0:04:04", "remaining_time": "0:46:01", "throughput": 6199.06, "total_tokens": 1517440} +{"current_steps": 3090, "total_steps": 37885, "loss": 0.0856, "lr": 1.6305093692267089e-06, "epoch": 0.40781311864854164, "percentage": 8.16, "elapsed_time": "0:04:05", "remaining_time": "0:46:00", "throughput": 6200.28, "total_tokens": 1519744} +{"current_steps": 3095, "total_steps": 37885, "loss": 0.1111, "lr": 1.6331485880179465e-06, "epoch": 0.4084730104262901, "percentage": 8.17, "elapsed_time": "0:04:05", "remaining_time": "0:45:58", "throughput": 6201.86, "total_tokens": 1522176} +{"current_steps": 3100, "total_steps": 37885, "loss": 0.0694, "lr": 1.6357878068091844e-06, "epoch": 0.40913290220403853, "percentage": 8.18, "elapsed_time": "0:04:05", "remaining_time": "0:45:57", "throughput": 6202.54, "total_tokens": 1524352} +{"current_steps": 3105, "total_steps": 37885, "loss": 0.2666, "lr": 1.6384270256004222e-06, "epoch": 0.409792793981787, "percentage": 8.2, "elapsed_time": "0:04:06", "remaining_time": "0:45:56", "throughput": 6204.82, "total_tokens": 1526976} +{"current_steps": 3110, "total_steps": 37885, "loss": 0.1743, "lr": 1.64106624439166e-06, "epoch": 0.4104526857595354, "percentage": 8.21, "elapsed_time": "0:04:06", "remaining_time": "0:45:55", "throughput": 6205.45, "total_tokens": 1529152} +{"current_steps": 3115, "total_steps": 37885, "loss": 0.0033, "lr": 1.6437054631828978e-06, "epoch": 0.41111257753728386, "percentage": 8.22, "elapsed_time": "0:04:06", "remaining_time": "0:45:54", "throughput": 6207.33, "total_tokens": 1531648} +{"current_steps": 3120, "total_steps": 37885, "loss": 0.1017, "lr": 1.6463446819741355e-06, "epoch": 0.4117724693150323, "percentage": 8.24, "elapsed_time": "0:04:07", "remaining_time": "0:45:53", "throughput": 6208.5, "total_tokens": 1533952} +{"current_steps": 3125, "total_steps": 37885, "loss": 0.294, "lr": 1.6489839007653735e-06, "epoch": 0.4124323610927808, "percentage": 8.25, "elapsed_time": "0:04:07", "remaining_time": "0:45:51", "throughput": 6211.0, "total_tokens": 1536640} +{"current_steps": 3130, "total_steps": 37885, "loss": 0.0033, "lr": 1.6516231195566113e-06, "epoch": 0.41309225287052925, "percentage": 8.26, "elapsed_time": "0:04:07", "remaining_time": "0:45:50", "throughput": 6213.0, "total_tokens": 1539200} +{"current_steps": 3135, "total_steps": 37885, "loss": 0.1664, "lr": 1.6542623383478489e-06, "epoch": 0.4137521446482777, "percentage": 8.28, "elapsed_time": "0:04:08", "remaining_time": "0:45:49", "throughput": 6214.68, "total_tokens": 1541632} +{"current_steps": 3140, "total_steps": 37885, "loss": 0.0619, "lr": 1.6569015571390868e-06, "epoch": 0.41441203642602614, "percentage": 8.29, "elapsed_time": "0:04:08", "remaining_time": "0:45:48", "throughput": 6216.52, "total_tokens": 1544128} +{"current_steps": 3145, "total_steps": 37885, "loss": 0.0231, "lr": 1.6595407759303246e-06, "epoch": 0.4150719282037746, "percentage": 8.3, "elapsed_time": "0:04:08", "remaining_time": "0:45:47", "throughput": 6217.42, "total_tokens": 1546368} +{"current_steps": 3150, "total_steps": 37885, "loss": 0.4345, "lr": 1.6621799947215624e-06, "epoch": 0.41573181998152303, "percentage": 8.31, "elapsed_time": "0:04:09", "remaining_time": "0:45:46", "throughput": 6218.83, "total_tokens": 1548736} +{"current_steps": 3155, "total_steps": 37885, "loss": 0.2605, "lr": 1.6648192135128002e-06, "epoch": 0.4163917117592715, "percentage": 8.33, "elapsed_time": "0:04:09", "remaining_time": "0:45:45", "throughput": 6220.43, "total_tokens": 1551168} +{"current_steps": 3160, "total_steps": 37885, "loss": 0.1204, "lr": 1.667458432304038e-06, "epoch": 0.4170516035370199, "percentage": 8.34, "elapsed_time": "0:04:09", "remaining_time": "0:45:43", "throughput": 6222.2, "total_tokens": 1553664} +{"current_steps": 3165, "total_steps": 37885, "loss": 0.1409, "lr": 1.6700976510952757e-06, "epoch": 0.41771149531476837, "percentage": 8.35, "elapsed_time": "0:04:10", "remaining_time": "0:45:42", "throughput": 6223.35, "total_tokens": 1555968} +{"current_steps": 3170, "total_steps": 37885, "loss": 0.064, "lr": 1.6727368698865135e-06, "epoch": 0.4183713870925168, "percentage": 8.37, "elapsed_time": "0:04:10", "remaining_time": "0:45:41", "throughput": 6224.25, "total_tokens": 1558208} +{"current_steps": 3175, "total_steps": 37885, "loss": 0.0094, "lr": 1.6753760886777513e-06, "epoch": 0.41903127887026526, "percentage": 8.38, "elapsed_time": "0:04:10", "remaining_time": "0:45:40", "throughput": 6225.84, "total_tokens": 1560640} +{"current_steps": 3180, "total_steps": 37885, "loss": 0.0375, "lr": 1.678015307468989e-06, "epoch": 0.4196911706480137, "percentage": 8.39, "elapsed_time": "0:04:10", "remaining_time": "0:45:39", "throughput": 6228.4, "total_tokens": 1563328} +{"current_steps": 3185, "total_steps": 37885, "loss": 0.1175, "lr": 1.680654526260227e-06, "epoch": 0.4203510624257622, "percentage": 8.41, "elapsed_time": "0:04:11", "remaining_time": "0:45:38", "throughput": 6229.73, "total_tokens": 1565696} +{"current_steps": 3190, "total_steps": 37885, "loss": 0.4013, "lr": 1.6832937450514646e-06, "epoch": 0.42101095420351065, "percentage": 8.42, "elapsed_time": "0:04:11", "remaining_time": "0:45:37", "throughput": 6233.09, "total_tokens": 1568640} +{"current_steps": 3195, "total_steps": 37885, "loss": 0.0542, "lr": 1.6859329638427023e-06, "epoch": 0.4216708459812591, "percentage": 8.43, "elapsed_time": "0:04:11", "remaining_time": "0:45:36", "throughput": 6235.53, "total_tokens": 1571328} +{"current_steps": 3200, "total_steps": 37885, "loss": 0.5376, "lr": 1.6885721826339403e-06, "epoch": 0.42233073775900754, "percentage": 8.45, "elapsed_time": "0:04:12", "remaining_time": "0:45:35", "throughput": 6237.95, "total_tokens": 1574016} +{"current_steps": 3205, "total_steps": 37885, "loss": 0.2516, "lr": 1.691211401425178e-06, "epoch": 0.422990629536756, "percentage": 8.46, "elapsed_time": "0:04:12", "remaining_time": "0:45:33", "throughput": 6239.21, "total_tokens": 1576384} +{"current_steps": 3210, "total_steps": 37885, "loss": 0.217, "lr": 1.6938506202164159e-06, "epoch": 0.4236505213145044, "percentage": 8.47, "elapsed_time": "0:04:12", "remaining_time": "0:45:32", "throughput": 6242.06, "total_tokens": 1579200} +{"current_steps": 3215, "total_steps": 37885, "loss": 0.1522, "lr": 1.6964898390076536e-06, "epoch": 0.42431041309225287, "percentage": 8.49, "elapsed_time": "0:04:13", "remaining_time": "0:45:31", "throughput": 6243.6, "total_tokens": 1581632} +{"current_steps": 3220, "total_steps": 37885, "loss": 0.0283, "lr": 1.6991290577988914e-06, "epoch": 0.4249703048700013, "percentage": 8.5, "elapsed_time": "0:04:13", "remaining_time": "0:45:30", "throughput": 6245.07, "total_tokens": 1584064} +{"current_steps": 3225, "total_steps": 37885, "loss": 0.0425, "lr": 1.7017682765901294e-06, "epoch": 0.42563019664774976, "percentage": 8.51, "elapsed_time": "0:04:13", "remaining_time": "0:45:29", "throughput": 6247.53, "total_tokens": 1586752} +{"current_steps": 3230, "total_steps": 37885, "loss": 0.1471, "lr": 1.704407495381367e-06, "epoch": 0.4262900884254982, "percentage": 8.53, "elapsed_time": "0:04:14", "remaining_time": "0:45:28", "throughput": 6249.29, "total_tokens": 1589248} +{"current_steps": 3235, "total_steps": 37885, "loss": 0.0052, "lr": 1.7070467141726047e-06, "epoch": 0.42694998020324665, "percentage": 8.54, "elapsed_time": "0:04:14", "remaining_time": "0:45:27", "throughput": 6250.38, "total_tokens": 1591552} +{"current_steps": 3240, "total_steps": 37885, "loss": 0.0685, "lr": 1.7096859329638427e-06, "epoch": 0.4276098719809951, "percentage": 8.55, "elapsed_time": "0:04:14", "remaining_time": "0:45:26", "throughput": 6252.14, "total_tokens": 1594048} +{"current_steps": 3245, "total_steps": 37885, "loss": 0.2108, "lr": 1.7123251517550805e-06, "epoch": 0.4282697637587436, "percentage": 8.57, "elapsed_time": "0:04:15", "remaining_time": "0:45:25", "throughput": 6253.89, "total_tokens": 1596544} +{"current_steps": 3250, "total_steps": 37885, "loss": 0.2884, "lr": 1.714964370546318e-06, "epoch": 0.42892965553649204, "percentage": 8.58, "elapsed_time": "0:04:15", "remaining_time": "0:45:24", "throughput": 6255.89, "total_tokens": 1599104} +{"current_steps": 3255, "total_steps": 37885, "loss": 0.1717, "lr": 1.717603589337556e-06, "epoch": 0.4295895473142405, "percentage": 8.59, "elapsed_time": "0:04:15", "remaining_time": "0:45:22", "throughput": 6256.67, "total_tokens": 1601344} +{"current_steps": 3260, "total_steps": 37885, "loss": 0.0818, "lr": 1.7202428081287938e-06, "epoch": 0.43024943909198893, "percentage": 8.6, "elapsed_time": "0:04:16", "remaining_time": "0:45:21", "throughput": 6258.83, "total_tokens": 1603968} +{"current_steps": 3265, "total_steps": 37885, "loss": 0.0261, "lr": 1.7228820269200314e-06, "epoch": 0.4309093308697374, "percentage": 8.62, "elapsed_time": "0:04:16", "remaining_time": "0:45:20", "throughput": 6260.32, "total_tokens": 1606400} +{"current_steps": 3270, "total_steps": 37885, "loss": 0.2814, "lr": 1.7255212457112694e-06, "epoch": 0.4315692226474858, "percentage": 8.63, "elapsed_time": "0:04:16", "remaining_time": "0:45:19", "throughput": 6262.23, "total_tokens": 1608960} +{"current_steps": 3275, "total_steps": 37885, "loss": 0.0021, "lr": 1.7281604645025071e-06, "epoch": 0.43222911442523426, "percentage": 8.64, "elapsed_time": "0:04:17", "remaining_time": "0:45:18", "throughput": 6263.96, "total_tokens": 1611456} +{"current_steps": 3280, "total_steps": 37885, "loss": 0.0595, "lr": 1.7307996832937451e-06, "epoch": 0.4328890062029827, "percentage": 8.66, "elapsed_time": "0:04:17", "remaining_time": "0:45:17", "throughput": 6265.68, "total_tokens": 1613952} +{"current_steps": 3285, "total_steps": 37885, "loss": 0.1862, "lr": 1.7334389020849827e-06, "epoch": 0.43354889798073115, "percentage": 8.67, "elapsed_time": "0:04:17", "remaining_time": "0:45:16", "throughput": 6266.99, "total_tokens": 1616320} +{"current_steps": 3290, "total_steps": 37885, "loss": 0.3119, "lr": 1.7360781208762205e-06, "epoch": 0.4342087897584796, "percentage": 8.68, "elapsed_time": "0:04:18", "remaining_time": "0:45:15", "throughput": 6269.8, "total_tokens": 1619136} +{"current_steps": 3295, "total_steps": 37885, "loss": 0.0999, "lr": 1.7387173396674584e-06, "epoch": 0.43486868153622804, "percentage": 8.7, "elapsed_time": "0:04:18", "remaining_time": "0:45:14", "throughput": 6271.49, "total_tokens": 1621632} +{"current_steps": 3300, "total_steps": 37885, "loss": 0.1682, "lr": 1.7413565584586962e-06, "epoch": 0.4355285733139765, "percentage": 8.71, "elapsed_time": "0:04:18", "remaining_time": "0:45:13", "throughput": 6272.92, "total_tokens": 1624064} +{"current_steps": 3305, "total_steps": 37885, "loss": 0.1445, "lr": 1.7439957772499338e-06, "epoch": 0.43618846509172493, "percentage": 8.72, "elapsed_time": "0:04:19", "remaining_time": "0:45:12", "throughput": 6274.4, "total_tokens": 1626496} +{"current_steps": 3310, "total_steps": 37885, "loss": 0.1393, "lr": 1.7466349960411718e-06, "epoch": 0.43684835686947343, "percentage": 8.74, "elapsed_time": "0:04:19", "remaining_time": "0:45:11", "throughput": 6275.38, "total_tokens": 1628800} +{"current_steps": 3315, "total_steps": 37885, "loss": 0.1456, "lr": 1.7492742148324095e-06, "epoch": 0.4375082486472219, "percentage": 8.75, "elapsed_time": "0:04:19", "remaining_time": "0:45:10", "throughput": 6276.85, "total_tokens": 1631232} +{"current_steps": 3320, "total_steps": 37885, "loss": 0.0401, "lr": 1.7519134336236473e-06, "epoch": 0.4381681404249703, "percentage": 8.76, "elapsed_time": "0:04:20", "remaining_time": "0:45:09", "throughput": 6278.44, "total_tokens": 1633728} +{"current_steps": 3325, "total_steps": 37885, "loss": 0.1055, "lr": 1.754552652414885e-06, "epoch": 0.43882803220271877, "percentage": 8.78, "elapsed_time": "0:04:20", "remaining_time": "0:45:08", "throughput": 6280.15, "total_tokens": 1636224} +{"current_steps": 3330, "total_steps": 37885, "loss": 0.0655, "lr": 1.7571918712061229e-06, "epoch": 0.4394879239804672, "percentage": 8.79, "elapsed_time": "0:04:20", "remaining_time": "0:45:06", "throughput": 6280.12, "total_tokens": 1638272} +{"current_steps": 3335, "total_steps": 37885, "loss": 0.0947, "lr": 1.7598310899973608e-06, "epoch": 0.44014781575821565, "percentage": 8.8, "elapsed_time": "0:04:21", "remaining_time": "0:45:05", "throughput": 6280.77, "total_tokens": 1640512} +{"current_steps": 3340, "total_steps": 37885, "loss": 0.0818, "lr": 1.7624703087885986e-06, "epoch": 0.4408077075359641, "percentage": 8.82, "elapsed_time": "0:04:21", "remaining_time": "0:45:04", "throughput": 6282.18, "total_tokens": 1642944} +{"current_steps": 3345, "total_steps": 37885, "loss": 0.0882, "lr": 1.7651095275798362e-06, "epoch": 0.44146759931371254, "percentage": 8.83, "elapsed_time": "0:04:21", "remaining_time": "0:45:03", "throughput": 6283.56, "total_tokens": 1645376} +{"current_steps": 3350, "total_steps": 37885, "loss": 0.0007, "lr": 1.7677487463710742e-06, "epoch": 0.442127491091461, "percentage": 8.84, "elapsed_time": "0:04:22", "remaining_time": "0:45:02", "throughput": 6285.42, "total_tokens": 1647936} +{"current_steps": 3355, "total_steps": 37885, "loss": 0.1307, "lr": 1.770387965162312e-06, "epoch": 0.44278738286920943, "percentage": 8.86, "elapsed_time": "0:04:22", "remaining_time": "0:45:01", "throughput": 6287.26, "total_tokens": 1650496} +{"current_steps": 3360, "total_steps": 37885, "loss": 0.1083, "lr": 1.7730271839535497e-06, "epoch": 0.4434472746469579, "percentage": 8.87, "elapsed_time": "0:04:22", "remaining_time": "0:45:00", "throughput": 6288.88, "total_tokens": 1652992} +{"current_steps": 3365, "total_steps": 37885, "loss": 0.0738, "lr": 1.7756664027447875e-06, "epoch": 0.4441071664247063, "percentage": 8.88, "elapsed_time": "0:04:23", "remaining_time": "0:44:59", "throughput": 6290.5, "total_tokens": 1655488} +{"current_steps": 3370, "total_steps": 37885, "loss": 0.1521, "lr": 1.7783056215360253e-06, "epoch": 0.4447670582024548, "percentage": 8.9, "elapsed_time": "0:04:23", "remaining_time": "0:44:58", "throughput": 6292.09, "total_tokens": 1657984} +{"current_steps": 3375, "total_steps": 37885, "loss": 0.2652, "lr": 1.780944840327263e-06, "epoch": 0.44542694998020327, "percentage": 8.91, "elapsed_time": "0:04:23", "remaining_time": "0:44:57", "throughput": 6294.49, "total_tokens": 1660736} +{"current_steps": 3380, "total_steps": 37885, "loss": 0.1661, "lr": 1.7835840591185008e-06, "epoch": 0.4460868417579517, "percentage": 8.92, "elapsed_time": "0:04:24", "remaining_time": "0:44:56", "throughput": 6295.6, "total_tokens": 1663104} +{"current_steps": 3385, "total_steps": 37885, "loss": 0.0768, "lr": 1.7862232779097386e-06, "epoch": 0.44674673353570016, "percentage": 8.93, "elapsed_time": "0:04:24", "remaining_time": "0:44:55", "throughput": 6296.27, "total_tokens": 1665344} +{"current_steps": 3390, "total_steps": 37885, "loss": 0.1197, "lr": 1.7888624967009763e-06, "epoch": 0.4474066253134486, "percentage": 8.95, "elapsed_time": "0:04:24", "remaining_time": "0:44:54", "throughput": 6298.72, "total_tokens": 1668096} +{"current_steps": 3395, "total_steps": 37885, "loss": 0.0718, "lr": 1.7915017154922143e-06, "epoch": 0.44806651709119705, "percentage": 8.96, "elapsed_time": "0:04:25", "remaining_time": "0:44:53", "throughput": 6299.16, "total_tokens": 1670272} +{"current_steps": 3400, "total_steps": 37885, "loss": 0.1287, "lr": 1.794140934283452e-06, "epoch": 0.4487264088689455, "percentage": 8.97, "elapsed_time": "0:04:25", "remaining_time": "0:44:52", "throughput": 6299.57, "total_tokens": 1672448} +{"current_steps": 3405, "total_steps": 37885, "loss": 0.0269, "lr": 1.7967801530746897e-06, "epoch": 0.44938630064669394, "percentage": 8.99, "elapsed_time": "0:04:25", "remaining_time": "0:44:51", "throughput": 6301.92, "total_tokens": 1675200} +{"current_steps": 3410, "total_steps": 37885, "loss": 0.1333, "lr": 1.7994193718659277e-06, "epoch": 0.4500461924244424, "percentage": 9.0, "elapsed_time": "0:04:26", "remaining_time": "0:44:50", "throughput": 6303.44, "total_tokens": 1677696} +{"current_steps": 3415, "total_steps": 37885, "loss": 0.0596, "lr": 1.8020585906571654e-06, "epoch": 0.4507060842021908, "percentage": 9.01, "elapsed_time": "0:04:26", "remaining_time": "0:44:49", "throughput": 6305.2, "total_tokens": 1680256} +{"current_steps": 3420, "total_steps": 37885, "loss": 0.085, "lr": 1.8046978094484032e-06, "epoch": 0.45136597597993927, "percentage": 9.03, "elapsed_time": "0:04:26", "remaining_time": "0:44:48", "throughput": 6306.25, "total_tokens": 1682624} +{"current_steps": 3425, "total_steps": 37885, "loss": 0.0495, "lr": 1.807337028239641e-06, "epoch": 0.4520258677576877, "percentage": 9.04, "elapsed_time": "0:04:27", "remaining_time": "0:44:47", "throughput": 6307.59, "total_tokens": 1685056} +{"current_steps": 3430, "total_steps": 37885, "loss": 0.1929, "lr": 1.8099762470308787e-06, "epoch": 0.45268575953543616, "percentage": 9.05, "elapsed_time": "0:04:27", "remaining_time": "0:44:46", "throughput": 6307.79, "total_tokens": 1687168} +{"current_steps": 3435, "total_steps": 37885, "loss": 0.277, "lr": 1.8126154658221167e-06, "epoch": 0.45334565131318466, "percentage": 9.07, "elapsed_time": "0:04:27", "remaining_time": "0:44:45", "throughput": 6307.72, "total_tokens": 1689216} +{"current_steps": 3440, "total_steps": 37885, "loss": 0.0305, "lr": 1.8152546846133543e-06, "epoch": 0.4540055430909331, "percentage": 9.08, "elapsed_time": "0:04:28", "remaining_time": "0:44:44", "throughput": 6309.5, "total_tokens": 1691776} +{"current_steps": 3445, "total_steps": 37885, "loss": 0.0776, "lr": 1.817893903404592e-06, "epoch": 0.45466543486868155, "percentage": 9.09, "elapsed_time": "0:04:28", "remaining_time": "0:44:43", "throughput": 6311.24, "total_tokens": 1694336} +{"current_steps": 3450, "total_steps": 37885, "loss": 0.2252, "lr": 1.82053312219583e-06, "epoch": 0.45532532664643, "percentage": 9.11, "elapsed_time": "0:04:28", "remaining_time": "0:44:42", "throughput": 6312.11, "total_tokens": 1696640} +{"current_steps": 3455, "total_steps": 37885, "loss": 0.0788, "lr": 1.8231723409870678e-06, "epoch": 0.45598521842417844, "percentage": 9.12, "elapsed_time": "0:04:29", "remaining_time": "0:44:41", "throughput": 6312.68, "total_tokens": 1698880} +{"current_steps": 3460, "total_steps": 37885, "loss": 0.2611, "lr": 1.8258115597783054e-06, "epoch": 0.4566451102019269, "percentage": 9.13, "elapsed_time": "0:04:29", "remaining_time": "0:44:40", "throughput": 6313.97, "total_tokens": 1701312} +{"current_steps": 3465, "total_steps": 37885, "loss": 0.2096, "lr": 1.8284507785695434e-06, "epoch": 0.45730500197967533, "percentage": 9.15, "elapsed_time": "0:04:29", "remaining_time": "0:44:39", "throughput": 6315.42, "total_tokens": 1703808} +{"current_steps": 3470, "total_steps": 37885, "loss": 0.0747, "lr": 1.8310899973607811e-06, "epoch": 0.4579648937574238, "percentage": 9.16, "elapsed_time": "0:04:30", "remaining_time": "0:44:38", "throughput": 6316.89, "total_tokens": 1706304} +{"current_steps": 3475, "total_steps": 37885, "loss": 0.3095, "lr": 1.8337292161520187e-06, "epoch": 0.4586247855351722, "percentage": 9.17, "elapsed_time": "0:04:30", "remaining_time": "0:44:37", "throughput": 6318.22, "total_tokens": 1708736} +{"current_steps": 3480, "total_steps": 37885, "loss": 0.1241, "lr": 1.8363684349432567e-06, "epoch": 0.45928467731292066, "percentage": 9.19, "elapsed_time": "0:04:30", "remaining_time": "0:44:37", "throughput": 6319.73, "total_tokens": 1711232} +{"current_steps": 3485, "total_steps": 37885, "loss": 0.0022, "lr": 1.8390076537344945e-06, "epoch": 0.4599445690906691, "percentage": 9.2, "elapsed_time": "0:04:31", "remaining_time": "0:44:36", "throughput": 6320.75, "total_tokens": 1713600} +{"current_steps": 3490, "total_steps": 37885, "loss": 0.1762, "lr": 1.8416468725257325e-06, "epoch": 0.46060446086841755, "percentage": 9.21, "elapsed_time": "0:04:31", "remaining_time": "0:44:35", "throughput": 6322.73, "total_tokens": 1716224} +{"current_steps": 3495, "total_steps": 37885, "loss": 0.2366, "lr": 1.84428609131697e-06, "epoch": 0.46126435264616605, "percentage": 9.23, "elapsed_time": "0:04:31", "remaining_time": "0:44:34", "throughput": 6324.22, "total_tokens": 1718720} +{"current_steps": 3500, "total_steps": 37885, "loss": 0.2068, "lr": 1.8469253101082078e-06, "epoch": 0.4619242444239145, "percentage": 9.24, "elapsed_time": "0:04:32", "remaining_time": "0:44:33", "throughput": 6325.94, "total_tokens": 1721280} +{"current_steps": 3505, "total_steps": 37885, "loss": 0.0785, "lr": 1.8495645288994458e-06, "epoch": 0.46258413620166294, "percentage": 9.25, "elapsed_time": "0:04:32", "remaining_time": "0:44:32", "throughput": 6327.22, "total_tokens": 1723712} +{"current_steps": 3510, "total_steps": 37885, "loss": 0.1624, "lr": 1.8522037476906835e-06, "epoch": 0.4632440279794114, "percentage": 9.26, "elapsed_time": "0:04:32", "remaining_time": "0:44:31", "throughput": 6327.85, "total_tokens": 1725952} +{"current_steps": 3515, "total_steps": 37885, "loss": 0.0022, "lr": 1.8548429664819211e-06, "epoch": 0.46390391975715983, "percentage": 9.28, "elapsed_time": "0:04:33", "remaining_time": "0:44:30", "throughput": 6329.53, "total_tokens": 1728512} +{"current_steps": 3520, "total_steps": 37885, "loss": 0.1997, "lr": 1.857482185273159e-06, "epoch": 0.4645638115349083, "percentage": 9.29, "elapsed_time": "0:04:33", "remaining_time": "0:44:29", "throughput": 6331.01, "total_tokens": 1731008} +{"current_steps": 3525, "total_steps": 37885, "loss": 0.1817, "lr": 1.8601214040643969e-06, "epoch": 0.4652237033126567, "percentage": 9.3, "elapsed_time": "0:04:33", "remaining_time": "0:44:28", "throughput": 6333.12, "total_tokens": 1733696} +{"current_steps": 3530, "total_steps": 37885, "loss": 0.0807, "lr": 1.8627606228556346e-06, "epoch": 0.46588359509040517, "percentage": 9.32, "elapsed_time": "0:04:34", "remaining_time": "0:44:27", "throughput": 6334.88, "total_tokens": 1736256} +{"current_steps": 3535, "total_steps": 37885, "loss": 0.2405, "lr": 1.8653998416468724e-06, "epoch": 0.4665434868681536, "percentage": 9.33, "elapsed_time": "0:04:34", "remaining_time": "0:44:26", "throughput": 6337.01, "total_tokens": 1738944} +{"current_steps": 3540, "total_steps": 37885, "loss": 0.207, "lr": 1.8680390604381102e-06, "epoch": 0.46720337864590206, "percentage": 9.34, "elapsed_time": "0:04:34", "remaining_time": "0:44:25", "throughput": 6338.95, "total_tokens": 1741568} +{"current_steps": 3545, "total_steps": 37885, "loss": 0.0024, "lr": 1.8706782792293482e-06, "epoch": 0.4678632704236505, "percentage": 9.36, "elapsed_time": "0:04:35", "remaining_time": "0:44:24", "throughput": 6339.52, "total_tokens": 1743808} +{"current_steps": 3550, "total_steps": 37885, "loss": 0.035, "lr": 1.873317498020586e-06, "epoch": 0.46852316220139895, "percentage": 9.37, "elapsed_time": "0:04:35", "remaining_time": "0:44:23", "throughput": 6340.64, "total_tokens": 1746176} +{"current_steps": 3555, "total_steps": 37885, "loss": 0.214, "lr": 1.8759567168118235e-06, "epoch": 0.46918305397914745, "percentage": 9.38, "elapsed_time": "0:04:35", "remaining_time": "0:44:22", "throughput": 6341.88, "total_tokens": 1748608} +{"current_steps": 3560, "total_steps": 37885, "loss": 0.0983, "lr": 1.8785959356030615e-06, "epoch": 0.4698429457568959, "percentage": 9.4, "elapsed_time": "0:04:36", "remaining_time": "0:44:21", "throughput": 6343.21, "total_tokens": 1751040} +{"current_steps": 3565, "total_steps": 37885, "loss": 0.1472, "lr": 1.8812351543942993e-06, "epoch": 0.47050283753464434, "percentage": 9.41, "elapsed_time": "0:04:36", "remaining_time": "0:44:20", "throughput": 6343.82, "total_tokens": 1753280} +{"current_steps": 3570, "total_steps": 37885, "loss": 0.4087, "lr": 1.883874373185537e-06, "epoch": 0.4711627293123928, "percentage": 9.42, "elapsed_time": "0:04:36", "remaining_time": "0:44:19", "throughput": 6345.15, "total_tokens": 1755712} +{"current_steps": 3575, "total_steps": 37885, "loss": 0.1291, "lr": 1.8865135919767748e-06, "epoch": 0.4718226210901412, "percentage": 9.44, "elapsed_time": "0:04:37", "remaining_time": "0:44:18", "throughput": 6345.75, "total_tokens": 1757952} +{"current_steps": 3580, "total_steps": 37885, "loss": 0.0719, "lr": 1.8891528107680126e-06, "epoch": 0.47248251286788967, "percentage": 9.45, "elapsed_time": "0:04:37", "remaining_time": "0:44:17", "throughput": 6346.93, "total_tokens": 1760384} +{"current_steps": 3585, "total_steps": 37885, "loss": 0.0011, "lr": 1.8917920295592504e-06, "epoch": 0.4731424046456381, "percentage": 9.46, "elapsed_time": "0:04:37", "remaining_time": "0:44:16", "throughput": 6348.54, "total_tokens": 1762944} +{"current_steps": 3590, "total_steps": 37885, "loss": 0.2399, "lr": 1.8944312483504881e-06, "epoch": 0.47380229642338656, "percentage": 9.48, "elapsed_time": "0:04:38", "remaining_time": "0:44:15", "throughput": 6350.3, "total_tokens": 1765504} +{"current_steps": 3595, "total_steps": 37885, "loss": 0.0452, "lr": 1.897070467141726e-06, "epoch": 0.474462188201135, "percentage": 9.49, "elapsed_time": "0:04:38", "remaining_time": "0:44:14", "throughput": 6352.17, "total_tokens": 1768128} +{"current_steps": 3600, "total_steps": 37885, "loss": 0.1898, "lr": 1.8997096859329637e-06, "epoch": 0.47512207997888345, "percentage": 9.5, "elapsed_time": "0:04:38", "remaining_time": "0:44:14", "throughput": 6353.59, "total_tokens": 1770624} +{"current_steps": 3605, "total_steps": 37885, "loss": 0.329, "lr": 1.9023489047242017e-06, "epoch": 0.4757819717566319, "percentage": 9.52, "elapsed_time": "0:04:39", "remaining_time": "0:44:13", "throughput": 6355.46, "total_tokens": 1773248} +{"current_steps": 3610, "total_steps": 37885, "loss": 0.0657, "lr": 1.9049881235154392e-06, "epoch": 0.47644186353438034, "percentage": 9.53, "elapsed_time": "0:04:39", "remaining_time": "0:44:12", "throughput": 6356.08, "total_tokens": 1775488} +{"current_steps": 3615, "total_steps": 37885, "loss": 0.1438, "lr": 1.907627342306677e-06, "epoch": 0.4771017553121288, "percentage": 9.54, "elapsed_time": "0:04:39", "remaining_time": "0:44:11", "throughput": 6356.87, "total_tokens": 1777792} +{"current_steps": 3620, "total_steps": 37885, "loss": 0.0654, "lr": 1.9102665610979148e-06, "epoch": 0.4777616470898773, "percentage": 9.56, "elapsed_time": "0:04:39", "remaining_time": "0:44:10", "throughput": 6358.5, "total_tokens": 1780352} +{"current_steps": 3625, "total_steps": 37885, "loss": 0.2003, "lr": 1.9129057798891528e-06, "epoch": 0.47842153886762573, "percentage": 9.57, "elapsed_time": "0:04:40", "remaining_time": "0:44:09", "throughput": 6360.9, "total_tokens": 1783168} +{"current_steps": 3630, "total_steps": 37885, "loss": 0.1582, "lr": 1.9155449986803903e-06, "epoch": 0.4790814306453742, "percentage": 9.58, "elapsed_time": "0:04:40", "remaining_time": "0:44:08", "throughput": 6363.16, "total_tokens": 1785920} +{"current_steps": 3635, "total_steps": 37885, "loss": 0.1929, "lr": 1.9181842174716283e-06, "epoch": 0.4797413224231226, "percentage": 9.59, "elapsed_time": "0:04:40", "remaining_time": "0:44:07", "throughput": 6364.61, "total_tokens": 1788416} +{"current_steps": 3640, "total_steps": 37885, "loss": 0.0412, "lr": 1.9208234362628663e-06, "epoch": 0.48040121420087106, "percentage": 9.61, "elapsed_time": "0:04:41", "remaining_time": "0:44:06", "throughput": 6365.77, "total_tokens": 1790848} +{"current_steps": 3645, "total_steps": 37885, "loss": 0.0349, "lr": 1.923462655054104e-06, "epoch": 0.4810611059786195, "percentage": 9.62, "elapsed_time": "0:04:41", "remaining_time": "0:44:05", "throughput": 6366.97, "total_tokens": 1793280} +{"current_steps": 3650, "total_steps": 37885, "loss": 0.209, "lr": 1.9261018738453414e-06, "epoch": 0.48172099775636795, "percentage": 9.63, "elapsed_time": "0:04:41", "remaining_time": "0:44:04", "throughput": 6367.94, "total_tokens": 1795648} +{"current_steps": 3655, "total_steps": 37885, "loss": 0.1709, "lr": 1.9287410926365794e-06, "epoch": 0.4823808895341164, "percentage": 9.65, "elapsed_time": "0:04:42", "remaining_time": "0:44:03", "throughput": 6368.67, "total_tokens": 1797952} +{"current_steps": 3660, "total_steps": 37885, "loss": 0.099, "lr": 1.9313803114278174e-06, "epoch": 0.48304078131186484, "percentage": 9.66, "elapsed_time": "0:04:42", "remaining_time": "0:44:02", "throughput": 6369.19, "total_tokens": 1800192} +{"current_steps": 3665, "total_steps": 37885, "loss": 0.1852, "lr": 1.9340195302190554e-06, "epoch": 0.4837006730896133, "percentage": 9.67, "elapsed_time": "0:04:42", "remaining_time": "0:44:02", "throughput": 6370.15, "total_tokens": 1802560} +{"current_steps": 3670, "total_steps": 37885, "loss": 0.2034, "lr": 1.936658749010293e-06, "epoch": 0.48436056486736173, "percentage": 9.69, "elapsed_time": "0:04:43", "remaining_time": "0:44:01", "throughput": 6371.5, "total_tokens": 1805056} +{"current_steps": 3675, "total_steps": 37885, "loss": 0.2424, "lr": 1.9392979678015305e-06, "epoch": 0.4850204566451102, "percentage": 9.7, "elapsed_time": "0:04:43", "remaining_time": "0:44:00", "throughput": 6373.52, "total_tokens": 1807744} +{"current_steps": 3680, "total_steps": 37885, "loss": 0.1529, "lr": 1.9419371865927685e-06, "epoch": 0.4856803484228587, "percentage": 9.71, "elapsed_time": "0:04:43", "remaining_time": "0:43:59", "throughput": 6375.31, "total_tokens": 1810368} +{"current_steps": 3685, "total_steps": 37885, "loss": 0.0411, "lr": 1.944576405384006e-06, "epoch": 0.4863402402006071, "percentage": 9.73, "elapsed_time": "0:04:44", "remaining_time": "0:43:58", "throughput": 6377.31, "total_tokens": 1813056} +{"current_steps": 3690, "total_steps": 37885, "loss": 0.2287, "lr": 1.947215624175244e-06, "epoch": 0.48700013197835557, "percentage": 9.74, "elapsed_time": "0:04:44", "remaining_time": "0:43:57", "throughput": 6377.92, "total_tokens": 1815360} +{"current_steps": 3695, "total_steps": 37885, "loss": 0.1827, "lr": 1.949854842966482e-06, "epoch": 0.487660023756104, "percentage": 9.75, "elapsed_time": "0:04:44", "remaining_time": "0:43:56", "throughput": 6379.49, "total_tokens": 1817920} +{"current_steps": 3700, "total_steps": 37885, "loss": 0.0576, "lr": 1.9524940617577196e-06, "epoch": 0.48831991553385246, "percentage": 9.77, "elapsed_time": "0:04:45", "remaining_time": "0:43:55", "throughput": 6380.35, "total_tokens": 1820288} +{"current_steps": 3705, "total_steps": 37885, "loss": 0.1, "lr": 1.955133280548957e-06, "epoch": 0.4889798073116009, "percentage": 9.78, "elapsed_time": "0:04:45", "remaining_time": "0:43:54", "throughput": 6381.33, "total_tokens": 1822656} +{"current_steps": 3710, "total_steps": 37885, "loss": 0.1911, "lr": 1.957772499340195e-06, "epoch": 0.48963969908934935, "percentage": 9.79, "elapsed_time": "0:04:45", "remaining_time": "0:43:54", "throughput": 6381.64, "total_tokens": 1824832} +{"current_steps": 3715, "total_steps": 37885, "loss": 0.1059, "lr": 1.960411718131433e-06, "epoch": 0.4902995908670978, "percentage": 9.81, "elapsed_time": "0:04:46", "remaining_time": "0:43:53", "throughput": 6382.6, "total_tokens": 1827200} +{"current_steps": 3720, "total_steps": 37885, "loss": 0.1355, "lr": 1.963050936922671e-06, "epoch": 0.49095948264484623, "percentage": 9.82, "elapsed_time": "0:04:46", "remaining_time": "0:43:52", "throughput": 6383.86, "total_tokens": 1829632} +{"current_steps": 3725, "total_steps": 37885, "loss": 0.0601, "lr": 1.9656901557139086e-06, "epoch": 0.4916193744225947, "percentage": 9.83, "elapsed_time": "0:04:46", "remaining_time": "0:43:51", "throughput": 6385.49, "total_tokens": 1832192} +{"current_steps": 3730, "total_steps": 37885, "loss": 0.1779, "lr": 1.968329374505146e-06, "epoch": 0.4922792662003431, "percentage": 9.85, "elapsed_time": "0:04:47", "remaining_time": "0:43:50", "throughput": 6387.06, "total_tokens": 1834752} +{"current_steps": 3735, "total_steps": 37885, "loss": 0.0968, "lr": 1.970968593296384e-06, "epoch": 0.49293915797809157, "percentage": 9.86, "elapsed_time": "0:04:47", "remaining_time": "0:43:49", "throughput": 6388.46, "total_tokens": 1837248} +{"current_steps": 3740, "total_steps": 37885, "loss": 0.2923, "lr": 1.973607812087622e-06, "epoch": 0.49359904975584007, "percentage": 9.87, "elapsed_time": "0:04:47", "remaining_time": "0:43:48", "throughput": 6389.56, "total_tokens": 1839680} +{"current_steps": 3745, "total_steps": 37885, "loss": 0.115, "lr": 1.9762470308788597e-06, "epoch": 0.4942589415335885, "percentage": 9.89, "elapsed_time": "0:04:48", "remaining_time": "0:43:47", "throughput": 6391.41, "total_tokens": 1842304} +{"current_steps": 3750, "total_steps": 37885, "loss": 0.0656, "lr": 1.9788862496700977e-06, "epoch": 0.49491883331133696, "percentage": 9.9, "elapsed_time": "0:04:48", "remaining_time": "0:43:46", "throughput": 6391.76, "total_tokens": 1844480} +{"current_steps": 3755, "total_steps": 37885, "loss": 0.0883, "lr": 1.9815254684613353e-06, "epoch": 0.4955787250890854, "percentage": 9.91, "elapsed_time": "0:04:48", "remaining_time": "0:43:45", "throughput": 6392.95, "total_tokens": 1846912} +{"current_steps": 3760, "total_steps": 37885, "loss": 0.1242, "lr": 1.9841646872525733e-06, "epoch": 0.49623861686683385, "percentage": 9.92, "elapsed_time": "0:04:49", "remaining_time": "0:43:44", "throughput": 6393.08, "total_tokens": 1849024} +{"current_steps": 3765, "total_steps": 37885, "loss": 0.0576, "lr": 1.986803906043811e-06, "epoch": 0.4968985086445823, "percentage": 9.94, "elapsed_time": "0:04:49", "remaining_time": "0:43:44", "throughput": 6395.04, "total_tokens": 1851712} +{"current_steps": 3770, "total_steps": 37885, "loss": 0.1076, "lr": 1.989443124835049e-06, "epoch": 0.49755840042233074, "percentage": 9.95, "elapsed_time": "0:04:49", "remaining_time": "0:43:43", "throughput": 6396.4, "total_tokens": 1854208} +{"current_steps": 3775, "total_steps": 37885, "loss": 0.0448, "lr": 1.9920823436262864e-06, "epoch": 0.4982182922000792, "percentage": 9.96, "elapsed_time": "0:04:50", "remaining_time": "0:43:42", "throughput": 6397.76, "total_tokens": 1856704} +{"current_steps": 3780, "total_steps": 37885, "loss": 0.1233, "lr": 1.9947215624175244e-06, "epoch": 0.4988781839778276, "percentage": 9.98, "elapsed_time": "0:04:50", "remaining_time": "0:43:41", "throughput": 6398.24, "total_tokens": 1858944} +{"current_steps": 3785, "total_steps": 37885, "loss": 0.1662, "lr": 1.997360781208762e-06, "epoch": 0.49953807575557607, "percentage": 9.99, "elapsed_time": "0:04:50", "remaining_time": "0:43:40", "throughput": 6400.42, "total_tokens": 1861696} +{"current_steps": 3790, "total_steps": 37885, "loss": 0.3196, "lr": 2e-06, "epoch": 0.5001979675333246, "percentage": 10.0, "elapsed_time": "0:04:51", "remaining_time": "0:43:39", "throughput": 6401.59, "total_tokens": 1864128} +{"current_steps": 3790, "total_steps": 37885, "eval_loss": 0.15765729546546936, "epoch": 0.5001979675333246, "percentage": 10.0, "elapsed_time": "0:04:59", "remaining_time": "0:44:49", "throughput": 6234.32, "total_tokens": 1864128} +{"current_steps": 3795, "total_steps": 37885, "loss": 0.0058, "lr": 1.9999998938786208e-06, "epoch": 0.500857859311073, "percentage": 10.02, "elapsed_time": "0:05:33", "remaining_time": "0:49:53", "throughput": 5600.48, "total_tokens": 1866432} +{"current_steps": 3800, "total_steps": 37885, "loss": 0.0722, "lr": 1.9999995755145053e-06, "epoch": 0.5015177510888215, "percentage": 10.03, "elapsed_time": "0:05:33", "remaining_time": "0:49:52", "throughput": 5601.92, "total_tokens": 1868736} +{"current_steps": 3805, "total_steps": 37885, "loss": 0.1512, "lr": 1.9999990449077214e-06, "epoch": 0.5021776428665699, "percentage": 10.04, "elapsed_time": "0:05:33", "remaining_time": "0:49:50", "throughput": 5604.2, "total_tokens": 1871360} +{"current_steps": 3810, "total_steps": 37885, "loss": 0.0042, "lr": 1.999998302058382e-06, "epoch": 0.5028375346443184, "percentage": 10.06, "elapsed_time": "0:05:34", "remaining_time": "0:49:49", "throughput": 5605.24, "total_tokens": 1873536} +{"current_steps": 3815, "total_steps": 37885, "loss": 0.2111, "lr": 1.999997346966644e-06, "epoch": 0.5034974264220667, "percentage": 10.07, "elapsed_time": "0:05:34", "remaining_time": "0:49:47", "throughput": 5606.79, "total_tokens": 1875904} +{"current_steps": 3820, "total_steps": 37885, "loss": 0.2222, "lr": 1.999996179632711e-06, "epoch": 0.5041573181998152, "percentage": 10.08, "elapsed_time": "0:05:34", "remaining_time": "0:49:46", "throughput": 5608.88, "total_tokens": 1878464} +{"current_steps": 3825, "total_steps": 37885, "loss": 0.1967, "lr": 1.9999948000568297e-06, "epoch": 0.5048172099775636, "percentage": 10.1, "elapsed_time": "0:05:35", "remaining_time": "0:49:45", "throughput": 5609.91, "total_tokens": 1880640} +{"current_steps": 3830, "total_steps": 37885, "loss": 0.1649, "lr": 1.9999932082392934e-06, "epoch": 0.5054771017553121, "percentage": 10.11, "elapsed_time": "0:05:35", "remaining_time": "0:49:43", "throughput": 5611.21, "total_tokens": 1882944} +{"current_steps": 3835, "total_steps": 37885, "loss": 0.064, "lr": 1.9999914041804405e-06, "epoch": 0.5061369935330606, "percentage": 10.12, "elapsed_time": "0:05:35", "remaining_time": "0:49:42", "throughput": 5612.56, "total_tokens": 1885248} +{"current_steps": 3840, "total_steps": 37885, "loss": 0.1077, "lr": 1.9999893878806534e-06, "epoch": 0.506796885310809, "percentage": 10.14, "elapsed_time": "0:05:36", "remaining_time": "0:49:40", "throughput": 5614.81, "total_tokens": 1887872} +{"current_steps": 3845, "total_steps": 37885, "loss": 0.0414, "lr": 1.99998715934036e-06, "epoch": 0.5074567770885575, "percentage": 10.15, "elapsed_time": "0:05:36", "remaining_time": "0:49:39", "throughput": 5617.24, "total_tokens": 1890560} +{"current_steps": 3850, "total_steps": 37885, "loss": 0.1999, "lr": 1.999984718560033e-06, "epoch": 0.5081166688663059, "percentage": 10.16, "elapsed_time": "0:05:36", "remaining_time": "0:49:38", "throughput": 5619.04, "total_tokens": 1893056} +{"current_steps": 3855, "total_steps": 37885, "loss": 0.1416, "lr": 1.9999820655401914e-06, "epoch": 0.5087765606440544, "percentage": 10.18, "elapsed_time": "0:05:37", "remaining_time": "0:49:36", "throughput": 5620.36, "total_tokens": 1895360} +{"current_steps": 3860, "total_steps": 37885, "loss": 0.0954, "lr": 1.9999792002813973e-06, "epoch": 0.5094364524218028, "percentage": 10.19, "elapsed_time": "0:05:37", "remaining_time": "0:49:35", "throughput": 5621.69, "total_tokens": 1897664} +{"current_steps": 3865, "total_steps": 37885, "loss": 0.27, "lr": 1.9999761227842592e-06, "epoch": 0.5100963441995513, "percentage": 10.2, "elapsed_time": "0:05:37", "remaining_time": "0:49:34", "throughput": 5623.97, "total_tokens": 1900288} +{"current_steps": 3870, "total_steps": 37885, "loss": 0.0797, "lr": 1.9999728330494307e-06, "epoch": 0.5107562359772997, "percentage": 10.22, "elapsed_time": "0:05:38", "remaining_time": "0:49:32", "throughput": 5626.53, "total_tokens": 1903040} +{"current_steps": 3875, "total_steps": 37885, "loss": 0.1566, "lr": 1.9999693310776095e-06, "epoch": 0.5114161277550482, "percentage": 10.23, "elapsed_time": "0:05:38", "remaining_time": "0:49:31", "throughput": 5628.21, "total_tokens": 1905472} +{"current_steps": 3880, "total_steps": 37885, "loss": 0.0922, "lr": 1.9999656168695387e-06, "epoch": 0.5120760195327966, "percentage": 10.24, "elapsed_time": "0:05:38", "remaining_time": "0:49:30", "throughput": 5629.36, "total_tokens": 1907712} +{"current_steps": 3885, "total_steps": 37885, "loss": 0.001, "lr": 1.9999616904260072e-06, "epoch": 0.5127359113105451, "percentage": 10.25, "elapsed_time": "0:05:39", "remaining_time": "0:49:28", "throughput": 5630.89, "total_tokens": 1910080} +{"current_steps": 3890, "total_steps": 37885, "loss": 0.2619, "lr": 1.9999575517478477e-06, "epoch": 0.5133958030882935, "percentage": 10.27, "elapsed_time": "0:05:39", "remaining_time": "0:49:27", "throughput": 5632.58, "total_tokens": 1912512} +{"current_steps": 3895, "total_steps": 37885, "loss": 0.1074, "lr": 1.9999532008359393e-06, "epoch": 0.514055694866042, "percentage": 10.28, "elapsed_time": "0:05:39", "remaining_time": "0:49:25", "throughput": 5633.76, "total_tokens": 1914752} +{"current_steps": 3900, "total_steps": 37885, "loss": 0.002, "lr": 1.999948637691205e-06, "epoch": 0.5147155866437905, "percentage": 10.29, "elapsed_time": "0:05:40", "remaining_time": "0:49:24", "throughput": 5635.27, "total_tokens": 1917120} +{"current_steps": 3905, "total_steps": 37885, "loss": 0.0127, "lr": 1.9999438623146132e-06, "epoch": 0.5153754784215389, "percentage": 10.31, "elapsed_time": "0:05:40", "remaining_time": "0:49:23", "throughput": 5636.06, "total_tokens": 1919232} +{"current_steps": 3910, "total_steps": 37885, "loss": 0.0941, "lr": 1.999938874707178e-06, "epoch": 0.5160353701992874, "percentage": 10.32, "elapsed_time": "0:05:40", "remaining_time": "0:49:21", "throughput": 5638.23, "total_tokens": 1921856} +{"current_steps": 3915, "total_steps": 37885, "loss": 0.1961, "lr": 1.9999336748699576e-06, "epoch": 0.5166952619770357, "percentage": 10.33, "elapsed_time": "0:05:41", "remaining_time": "0:49:20", "throughput": 5639.71, "total_tokens": 1924224} +{"current_steps": 3920, "total_steps": 37885, "loss": 0.0313, "lr": 1.9999282628040553e-06, "epoch": 0.5173551537547842, "percentage": 10.35, "elapsed_time": "0:05:41", "remaining_time": "0:49:19", "throughput": 5640.86, "total_tokens": 1926464} +{"current_steps": 3925, "total_steps": 37885, "loss": 0.3644, "lr": 1.9999226385106205e-06, "epoch": 0.5180150455325326, "percentage": 10.36, "elapsed_time": "0:05:41", "remaining_time": "0:49:17", "throughput": 5642.58, "total_tokens": 1928896} +{"current_steps": 3930, "total_steps": 37885, "loss": 0.1085, "lr": 1.9999168019908464e-06, "epoch": 0.5186749373102811, "percentage": 10.37, "elapsed_time": "0:05:42", "remaining_time": "0:49:16", "throughput": 5643.9, "total_tokens": 1931200} +{"current_steps": 3935, "total_steps": 37885, "loss": 0.1389, "lr": 1.9999107532459716e-06, "epoch": 0.5193348290880295, "percentage": 10.39, "elapsed_time": "0:05:42", "remaining_time": "0:49:15", "throughput": 5645.61, "total_tokens": 1933632} +{"current_steps": 3940, "total_steps": 37885, "loss": 0.0024, "lr": 1.9999044922772808e-06, "epoch": 0.519994720865778, "percentage": 10.4, "elapsed_time": "0:05:42", "remaining_time": "0:49:13", "throughput": 5646.64, "total_tokens": 1935872} +{"current_steps": 3945, "total_steps": 37885, "loss": 0.0619, "lr": 1.999898019086102e-06, "epoch": 0.5206546126435264, "percentage": 10.41, "elapsed_time": "0:05:43", "remaining_time": "0:49:12", "throughput": 5647.59, "total_tokens": 1938048} +{"current_steps": 3950, "total_steps": 37885, "loss": 0.0606, "lr": 1.999891333673809e-06, "epoch": 0.5213145044212749, "percentage": 10.43, "elapsed_time": "0:05:43", "remaining_time": "0:49:11", "throughput": 5649.6, "total_tokens": 1940608} +{"current_steps": 3955, "total_steps": 37885, "loss": 0.3859, "lr": 1.999884436041822e-06, "epoch": 0.5219743961990233, "percentage": 10.44, "elapsed_time": "0:05:43", "remaining_time": "0:49:09", "throughput": 5651.24, "total_tokens": 1943040} +{"current_steps": 3960, "total_steps": 37885, "loss": 0.1834, "lr": 1.999877326191603e-06, "epoch": 0.5226342879767718, "percentage": 10.45, "elapsed_time": "0:05:44", "remaining_time": "0:49:08", "throughput": 5653.41, "total_tokens": 1945664} +{"current_steps": 3965, "total_steps": 37885, "loss": 0.0053, "lr": 1.9998700041246626e-06, "epoch": 0.5232941797545203, "percentage": 10.47, "elapsed_time": "0:05:44", "remaining_time": "0:49:07", "throughput": 5654.88, "total_tokens": 1948032} +{"current_steps": 3970, "total_steps": 37885, "loss": 0.0008, "lr": 1.9998624698425545e-06, "epoch": 0.5239540715322687, "percentage": 10.48, "elapsed_time": "0:05:44", "remaining_time": "0:49:05", "throughput": 5655.8, "total_tokens": 1950208} +{"current_steps": 3975, "total_steps": 37885, "loss": 0.1693, "lr": 1.999854723346877e-06, "epoch": 0.5246139633100172, "percentage": 10.49, "elapsed_time": "0:05:45", "remaining_time": "0:49:04", "throughput": 5658.07, "total_tokens": 1952896} +{"current_steps": 3980, "total_steps": 37885, "loss": 0.0881, "lr": 1.999846764639275e-06, "epoch": 0.5252738550877656, "percentage": 10.51, "elapsed_time": "0:05:45", "remaining_time": "0:49:03", "throughput": 5659.34, "total_tokens": 1955200} +{"current_steps": 3985, "total_steps": 37885, "loss": 0.0753, "lr": 1.999838593721438e-06, "epoch": 0.5259337468655141, "percentage": 10.52, "elapsed_time": "0:05:45", "remaining_time": "0:49:01", "throughput": 5660.23, "total_tokens": 1957376} +{"current_steps": 3990, "total_steps": 37885, "loss": 0.2321, "lr": 1.999830210595099e-06, "epoch": 0.5265936386432625, "percentage": 10.53, "elapsed_time": "0:05:46", "remaining_time": "0:49:00", "throughput": 5662.49, "total_tokens": 1960064} +{"current_steps": 3995, "total_steps": 37885, "loss": 0.1365, "lr": 1.999821615262039e-06, "epoch": 0.527253530421011, "percentage": 10.55, "elapsed_time": "0:05:46", "remaining_time": "0:48:59", "throughput": 5664.47, "total_tokens": 1962624} +{"current_steps": 4000, "total_steps": 37885, "loss": 0.0799, "lr": 1.9998128077240805e-06, "epoch": 0.5279134221987594, "percentage": 10.56, "elapsed_time": "0:05:46", "remaining_time": "0:48:57", "throughput": 5666.12, "total_tokens": 1965056} +{"current_steps": 4005, "total_steps": 37885, "loss": 0.137, "lr": 1.9998037879830937e-06, "epoch": 0.5285733139765079, "percentage": 10.57, "elapsed_time": "0:05:47", "remaining_time": "0:48:56", "throughput": 5667.62, "total_tokens": 1967424} +{"current_steps": 4010, "total_steps": 37885, "loss": 0.2433, "lr": 1.999794556040993e-06, "epoch": 0.5292332057542563, "percentage": 10.58, "elapsed_time": "0:05:47", "remaining_time": "0:48:55", "throughput": 5669.27, "total_tokens": 1969856} +{"current_steps": 4015, "total_steps": 37885, "loss": 0.0609, "lr": 1.999785111899738e-06, "epoch": 0.5298930975320048, "percentage": 10.6, "elapsed_time": "0:05:47", "remaining_time": "0:48:53", "throughput": 5671.56, "total_tokens": 1972544} +{"current_steps": 4020, "total_steps": 37885, "loss": 0.0675, "lr": 1.9997754555613324e-06, "epoch": 0.5305529893097533, "percentage": 10.61, "elapsed_time": "0:05:48", "remaining_time": "0:48:52", "throughput": 5672.99, "total_tokens": 1974912} +{"current_steps": 4025, "total_steps": 37885, "loss": 0.2871, "lr": 1.999765587027827e-06, "epoch": 0.5312128810875016, "percentage": 10.62, "elapsed_time": "0:05:48", "remaining_time": "0:48:51", "throughput": 5674.25, "total_tokens": 1977216} +{"current_steps": 4030, "total_steps": 37885, "loss": 0.2346, "lr": 1.9997555063013147e-06, "epoch": 0.5318727728652501, "percentage": 10.64, "elapsed_time": "0:05:48", "remaining_time": "0:48:50", "throughput": 5675.88, "total_tokens": 1979648} +{"current_steps": 4035, "total_steps": 37885, "loss": 0.013, "lr": 1.999745213383936e-06, "epoch": 0.5325326646429985, "percentage": 10.65, "elapsed_time": "0:05:49", "remaining_time": "0:48:48", "throughput": 5676.97, "total_tokens": 1981888} +{"current_steps": 4040, "total_steps": 37885, "loss": 0.1859, "lr": 1.9997347082778753e-06, "epoch": 0.533192556420747, "percentage": 10.66, "elapsed_time": "0:05:49", "remaining_time": "0:48:47", "throughput": 5678.93, "total_tokens": 1984448} +{"current_steps": 4045, "total_steps": 37885, "loss": 0.3186, "lr": 1.999723990985363e-06, "epoch": 0.5338524481984954, "percentage": 10.68, "elapsed_time": "0:05:49", "remaining_time": "0:48:46", "throughput": 5681.02, "total_tokens": 1987072} +{"current_steps": 4050, "total_steps": 37885, "loss": 0.1181, "lr": 1.999713061508672e-06, "epoch": 0.5345123399762439, "percentage": 10.69, "elapsed_time": "0:05:50", "remaining_time": "0:48:44", "throughput": 5682.95, "total_tokens": 1989632} +{"current_steps": 4055, "total_steps": 37885, "loss": 0.0538, "lr": 1.9997019198501233e-06, "epoch": 0.5351722317539923, "percentage": 10.7, "elapsed_time": "0:05:50", "remaining_time": "0:48:43", "throughput": 5684.94, "total_tokens": 1992192} +{"current_steps": 4060, "total_steps": 37885, "loss": 0.265, "lr": 1.999690566012082e-06, "epoch": 0.5358321235317408, "percentage": 10.72, "elapsed_time": "0:05:50", "remaining_time": "0:48:42", "throughput": 5686.54, "total_tokens": 1994624} +{"current_steps": 4065, "total_steps": 37885, "loss": 0.0607, "lr": 1.9996789999969568e-06, "epoch": 0.5364920153094892, "percentage": 10.73, "elapsed_time": "0:05:51", "remaining_time": "0:48:41", "throughput": 5688.09, "total_tokens": 1997056} +{"current_steps": 4070, "total_steps": 37885, "loss": 0.134, "lr": 1.999667221807203e-06, "epoch": 0.5371519070872377, "percentage": 10.74, "elapsed_time": "0:05:51", "remaining_time": "0:48:39", "throughput": 5689.21, "total_tokens": 1999360} +{"current_steps": 4075, "total_steps": 37885, "loss": 0.082, "lr": 1.9996552314453204e-06, "epoch": 0.5378117988649861, "percentage": 10.76, "elapsed_time": "0:05:51", "remaining_time": "0:48:38", "throughput": 5690.86, "total_tokens": 2001856} +{"current_steps": 4080, "total_steps": 37885, "loss": 0.1956, "lr": 1.999643028913854e-06, "epoch": 0.5384716906427346, "percentage": 10.77, "elapsed_time": "0:05:52", "remaining_time": "0:48:37", "throughput": 5692.43, "total_tokens": 2004288} +{"current_steps": 4085, "total_steps": 37885, "loss": 0.1135, "lr": 1.9996306142153935e-06, "epoch": 0.5391315824204831, "percentage": 10.78, "elapsed_time": "0:05:52", "remaining_time": "0:48:36", "throughput": 5694.66, "total_tokens": 2006976} +{"current_steps": 4090, "total_steps": 37885, "loss": 0.0367, "lr": 1.9996179873525737e-06, "epoch": 0.5397914741982315, "percentage": 10.8, "elapsed_time": "0:05:52", "remaining_time": "0:48:34", "throughput": 5695.81, "total_tokens": 2009280} +{"current_steps": 4095, "total_steps": 37885, "loss": 0.2253, "lr": 1.9996051483280744e-06, "epoch": 0.54045136597598, "percentage": 10.81, "elapsed_time": "0:05:53", "remaining_time": "0:48:33", "throughput": 5697.54, "total_tokens": 2011776} +{"current_steps": 4100, "total_steps": 37885, "loss": 0.4021, "lr": 1.9995920971446215e-06, "epoch": 0.5411112577537284, "percentage": 10.82, "elapsed_time": "0:05:53", "remaining_time": "0:48:32", "throughput": 5699.35, "total_tokens": 2014336} +{"current_steps": 4105, "total_steps": 37885, "loss": 0.1495, "lr": 1.9995788338049846e-06, "epoch": 0.5417711495314769, "percentage": 10.84, "elapsed_time": "0:05:53", "remaining_time": "0:48:31", "throughput": 5701.76, "total_tokens": 2017152} +{"current_steps": 4110, "total_steps": 37885, "loss": 0.1161, "lr": 1.999565358311978e-06, "epoch": 0.5424310413092253, "percentage": 10.85, "elapsed_time": "0:05:54", "remaining_time": "0:48:29", "throughput": 5703.14, "total_tokens": 2019520} +{"current_steps": 4115, "total_steps": 37885, "loss": 0.1655, "lr": 1.999551670668463e-06, "epoch": 0.5430909330869738, "percentage": 10.86, "elapsed_time": "0:05:54", "remaining_time": "0:48:28", "throughput": 5703.69, "total_tokens": 2021632} +{"current_steps": 4120, "total_steps": 37885, "loss": 0.1365, "lr": 1.9995377708773437e-06, "epoch": 0.5437508248647221, "percentage": 10.88, "elapsed_time": "0:05:54", "remaining_time": "0:48:27", "throughput": 5704.25, "total_tokens": 2023744} +{"current_steps": 4125, "total_steps": 37885, "loss": 0.0072, "lr": 1.999523658941571e-06, "epoch": 0.5444107166424706, "percentage": 10.89, "elapsed_time": "0:05:55", "remaining_time": "0:48:26", "throughput": 5705.48, "total_tokens": 2026048} +{"current_steps": 4130, "total_steps": 37885, "loss": 0.0905, "lr": 1.999509334864139e-06, "epoch": 0.545070608420219, "percentage": 10.9, "elapsed_time": "0:05:55", "remaining_time": "0:48:25", "throughput": 5706.69, "total_tokens": 2028352} +{"current_steps": 4135, "total_steps": 37885, "loss": 0.0448, "lr": 1.999494798648089e-06, "epoch": 0.5457305001979675, "percentage": 10.91, "elapsed_time": "0:05:55", "remaining_time": "0:48:23", "throughput": 5708.34, "total_tokens": 2030848} +{"current_steps": 4140, "total_steps": 37885, "loss": 0.1427, "lr": 1.9994800502965055e-06, "epoch": 0.5463903919757159, "percentage": 10.93, "elapsed_time": "0:05:56", "remaining_time": "0:48:22", "throughput": 5709.93, "total_tokens": 2033344} +{"current_steps": 4145, "total_steps": 37885, "loss": 0.0661, "lr": 1.9994650898125193e-06, "epoch": 0.5470502837534644, "percentage": 10.94, "elapsed_time": "0:05:56", "remaining_time": "0:48:21", "throughput": 5710.87, "total_tokens": 2035584} +{"current_steps": 4150, "total_steps": 37885, "loss": 0.0643, "lr": 1.9994499171993056e-06, "epoch": 0.5477101755312129, "percentage": 10.95, "elapsed_time": "0:05:56", "remaining_time": "0:48:20", "throughput": 5712.94, "total_tokens": 2038272} +{"current_steps": 4155, "total_steps": 37885, "loss": 0.1879, "lr": 1.999434532460084e-06, "epoch": 0.5483700673089613, "percentage": 10.97, "elapsed_time": "0:05:57", "remaining_time": "0:48:19", "throughput": 5714.44, "total_tokens": 2040768} +{"current_steps": 4160, "total_steps": 37885, "loss": 0.1577, "lr": 1.99941893559812e-06, "epoch": 0.5490299590867098, "percentage": 10.98, "elapsed_time": "0:05:57", "remaining_time": "0:48:17", "throughput": 5715.52, "total_tokens": 2043072} +{"current_steps": 4165, "total_steps": 37885, "loss": 0.1324, "lr": 1.9994031266167247e-06, "epoch": 0.5496898508644582, "percentage": 10.99, "elapsed_time": "0:05:57", "remaining_time": "0:48:16", "throughput": 5717.84, "total_tokens": 2045824} +{"current_steps": 4170, "total_steps": 37885, "loss": 0.366, "lr": 1.999387105519253e-06, "epoch": 0.5503497426422067, "percentage": 11.01, "elapsed_time": "0:05:58", "remaining_time": "0:48:15", "throughput": 5718.8, "total_tokens": 2048064} +{"current_steps": 4175, "total_steps": 37885, "loss": 0.0539, "lr": 1.9993708723091044e-06, "epoch": 0.5510096344199551, "percentage": 11.02, "elapsed_time": "0:05:58", "remaining_time": "0:48:14", "throughput": 5720.05, "total_tokens": 2050432} +{"current_steps": 4180, "total_steps": 37885, "loss": 0.0967, "lr": 1.9993544269897253e-06, "epoch": 0.5516695261977036, "percentage": 11.03, "elapsed_time": "0:05:58", "remaining_time": "0:48:13", "throughput": 5721.74, "total_tokens": 2052928} +{"current_steps": 4185, "total_steps": 37885, "loss": 0.1474, "lr": 1.999337769564606e-06, "epoch": 0.552329417975452, "percentage": 11.05, "elapsed_time": "0:05:59", "remaining_time": "0:48:11", "throughput": 5723.35, "total_tokens": 2055424} +{"current_steps": 4190, "total_steps": 37885, "loss": 0.121, "lr": 1.9993209000372814e-06, "epoch": 0.5529893097532005, "percentage": 11.06, "elapsed_time": "0:05:59", "remaining_time": "0:48:10", "throughput": 5723.93, "total_tokens": 2057536} +{"current_steps": 4195, "total_steps": 37885, "loss": 0.2545, "lr": 1.9993038184113325e-06, "epoch": 0.5536492015309489, "percentage": 11.07, "elapsed_time": "0:05:59", "remaining_time": "0:48:09", "throughput": 5725.03, "total_tokens": 2059840} +{"current_steps": 4200, "total_steps": 37885, "loss": 0.2414, "lr": 1.999286524690385e-06, "epoch": 0.5543090933086974, "percentage": 11.09, "elapsed_time": "0:06:00", "remaining_time": "0:48:08", "throughput": 5727.38, "total_tokens": 2062656} +{"current_steps": 4205, "total_steps": 37885, "loss": 0.2045, "lr": 1.999269018878108e-06, "epoch": 0.5549689850864459, "percentage": 11.1, "elapsed_time": "0:06:00", "remaining_time": "0:48:07", "throughput": 5728.46, "total_tokens": 2064960} +{"current_steps": 4210, "total_steps": 37885, "loss": 0.233, "lr": 1.999251300978219e-06, "epoch": 0.5556288768641943, "percentage": 11.11, "elapsed_time": "0:06:00", "remaining_time": "0:48:06", "throughput": 5728.87, "total_tokens": 2067008} +{"current_steps": 4215, "total_steps": 37885, "loss": 0.0333, "lr": 1.9992333709944764e-06, "epoch": 0.5562887686419428, "percentage": 11.13, "elapsed_time": "0:06:01", "remaining_time": "0:48:04", "throughput": 5730.95, "total_tokens": 2069696} +{"current_steps": 4220, "total_steps": 37885, "loss": 0.0229, "lr": 1.9992152289306872e-06, "epoch": 0.5569486604196912, "percentage": 11.14, "elapsed_time": "0:06:01", "remaining_time": "0:48:03", "throughput": 5732.82, "total_tokens": 2072320} +{"current_steps": 4225, "total_steps": 37885, "loss": 0.005, "lr": 1.999196874790701e-06, "epoch": 0.5576085521974397, "percentage": 11.15, "elapsed_time": "0:06:01", "remaining_time": "0:48:02", "throughput": 5734.33, "total_tokens": 2074752} +{"current_steps": 4230, "total_steps": 37885, "loss": 0.1422, "lr": 1.999178308578414e-06, "epoch": 0.558268443975188, "percentage": 11.17, "elapsed_time": "0:06:02", "remaining_time": "0:48:01", "throughput": 5736.36, "total_tokens": 2077440} +{"current_steps": 4235, "total_steps": 37885, "loss": 0.2342, "lr": 1.9991595302977666e-06, "epoch": 0.5589283357529365, "percentage": 11.18, "elapsed_time": "0:06:02", "remaining_time": "0:48:00", "throughput": 5738.07, "total_tokens": 2080000} +{"current_steps": 4240, "total_steps": 37885, "loss": 0.196, "lr": 1.9991405399527438e-06, "epoch": 0.5595882275306849, "percentage": 11.19, "elapsed_time": "0:06:02", "remaining_time": "0:47:59", "throughput": 5739.78, "total_tokens": 2082560} +{"current_steps": 4245, "total_steps": 37885, "loss": 0.1375, "lr": 1.999121337547377e-06, "epoch": 0.5602481193084334, "percentage": 11.2, "elapsed_time": "0:06:03", "remaining_time": "0:47:57", "throughput": 5740.89, "total_tokens": 2084864} +{"current_steps": 4250, "total_steps": 37885, "loss": 0.097, "lr": 1.9991019230857413e-06, "epoch": 0.5609080110861818, "percentage": 11.22, "elapsed_time": "0:06:03", "remaining_time": "0:47:56", "throughput": 5742.57, "total_tokens": 2087424} +{"current_steps": 4255, "total_steps": 37885, "loss": 0.1572, "lr": 1.999082296571957e-06, "epoch": 0.5615679028639303, "percentage": 11.23, "elapsed_time": "0:06:03", "remaining_time": "0:47:55", "throughput": 5744.42, "total_tokens": 2090048} +{"current_steps": 4260, "total_steps": 37885, "loss": 0.0845, "lr": 1.9990624580101907e-06, "epoch": 0.5622277946416787, "percentage": 11.24, "elapsed_time": "0:06:04", "remaining_time": "0:47:54", "throughput": 5745.61, "total_tokens": 2092416} +{"current_steps": 4265, "total_steps": 37885, "loss": 0.0568, "lr": 1.999042407404652e-06, "epoch": 0.5628876864194272, "percentage": 11.26, "elapsed_time": "0:06:04", "remaining_time": "0:47:53", "throughput": 5746.51, "total_tokens": 2094656} +{"current_steps": 4270, "total_steps": 37885, "loss": 0.0462, "lr": 1.999022144759597e-06, "epoch": 0.5635475781971757, "percentage": 11.27, "elapsed_time": "0:06:04", "remaining_time": "0:47:52", "throughput": 5747.78, "total_tokens": 2097024} +{"current_steps": 4275, "total_steps": 37885, "loss": 0.0643, "lr": 1.9990016700793257e-06, "epoch": 0.5642074699749241, "percentage": 11.28, "elapsed_time": "0:06:05", "remaining_time": "0:47:50", "throughput": 5749.03, "total_tokens": 2099392} +{"current_steps": 4280, "total_steps": 37885, "loss": 0.0911, "lr": 1.9989809833681845e-06, "epoch": 0.5648673617526726, "percentage": 11.3, "elapsed_time": "0:06:05", "remaining_time": "0:47:49", "throughput": 5750.88, "total_tokens": 2102016} +{"current_steps": 4285, "total_steps": 37885, "loss": 0.2019, "lr": 1.9989600846305634e-06, "epoch": 0.565527253530421, "percentage": 11.31, "elapsed_time": "0:06:05", "remaining_time": "0:47:48", "throughput": 5751.91, "total_tokens": 2104320} +{"current_steps": 4290, "total_steps": 37885, "loss": 0.1077, "lr": 1.9989389738708984e-06, "epoch": 0.5661871453081695, "percentage": 11.32, "elapsed_time": "0:06:06", "remaining_time": "0:47:47", "throughput": 5754.25, "total_tokens": 2107136} +{"current_steps": 4295, "total_steps": 37885, "loss": 0.0653, "lr": 1.9989176510936698e-06, "epoch": 0.5668470370859179, "percentage": 11.34, "elapsed_time": "0:06:06", "remaining_time": "0:47:46", "throughput": 5756.48, "total_tokens": 2109888} +{"current_steps": 4300, "total_steps": 37885, "loss": 0.1031, "lr": 1.9988961163034033e-06, "epoch": 0.5675069288636664, "percentage": 11.35, "elapsed_time": "0:06:06", "remaining_time": "0:47:45", "throughput": 5757.52, "total_tokens": 2112192} +{"current_steps": 4305, "total_steps": 37885, "loss": 0.1154, "lr": 1.9988743695046696e-06, "epoch": 0.5681668206414148, "percentage": 11.36, "elapsed_time": "0:06:07", "remaining_time": "0:47:44", "throughput": 5759.16, "total_tokens": 2114752} +{"current_steps": 4310, "total_steps": 37885, "loss": 0.0766, "lr": 1.9988524107020844e-06, "epoch": 0.5688267124191633, "percentage": 11.38, "elapsed_time": "0:06:07", "remaining_time": "0:47:43", "throughput": 5760.51, "total_tokens": 2117184} +{"current_steps": 4315, "total_steps": 37885, "loss": 0.1522, "lr": 1.9988302399003083e-06, "epoch": 0.5694866041969117, "percentage": 11.39, "elapsed_time": "0:06:07", "remaining_time": "0:47:41", "throughput": 5761.74, "total_tokens": 2119552} +{"current_steps": 4320, "total_steps": 37885, "loss": 0.1321, "lr": 1.9988078571040464e-06, "epoch": 0.5701464959746602, "percentage": 11.4, "elapsed_time": "0:06:08", "remaining_time": "0:47:40", "throughput": 5762.85, "total_tokens": 2121920} +{"current_steps": 4325, "total_steps": 37885, "loss": 0.2248, "lr": 1.99878526231805e-06, "epoch": 0.5708063877524086, "percentage": 11.42, "elapsed_time": "0:06:08", "remaining_time": "0:47:39", "throughput": 5763.56, "total_tokens": 2124096} +{"current_steps": 4330, "total_steps": 37885, "loss": 0.1275, "lr": 1.998762455547114e-06, "epoch": 0.571466279530157, "percentage": 11.43, "elapsed_time": "0:06:08", "remaining_time": "0:47:38", "throughput": 5764.85, "total_tokens": 2126528} +{"current_steps": 4335, "total_steps": 37885, "loss": 0.0773, "lr": 1.998739436796079e-06, "epoch": 0.5721261713079056, "percentage": 11.44, "elapsed_time": "0:06:09", "remaining_time": "0:47:37", "throughput": 5765.24, "total_tokens": 2128576} +{"current_steps": 4340, "total_steps": 37885, "loss": 0.1947, "lr": 1.9987162060698312e-06, "epoch": 0.5727860630856539, "percentage": 11.46, "elapsed_time": "0:06:09", "remaining_time": "0:47:36", "throughput": 5765.74, "total_tokens": 2130688} +{"current_steps": 4345, "total_steps": 37885, "loss": 0.2038, "lr": 1.9986927633733007e-06, "epoch": 0.5734459548634024, "percentage": 11.47, "elapsed_time": "0:06:09", "remaining_time": "0:47:35", "throughput": 5767.53, "total_tokens": 2133312} +{"current_steps": 4350, "total_steps": 37885, "loss": 0.1005, "lr": 1.9986691087114634e-06, "epoch": 0.5741058466411508, "percentage": 11.48, "elapsed_time": "0:06:10", "remaining_time": "0:47:34", "throughput": 5768.67, "total_tokens": 2135680} +{"current_steps": 4355, "total_steps": 37885, "loss": 0.1931, "lr": 1.9986452420893393e-06, "epoch": 0.5747657384188993, "percentage": 11.5, "elapsed_time": "0:06:10", "remaining_time": "0:47:32", "throughput": 5770.0, "total_tokens": 2138112} +{"current_steps": 4360, "total_steps": 37885, "loss": 0.3394, "lr": 1.998621163511994e-06, "epoch": 0.5754256301966477, "percentage": 11.51, "elapsed_time": "0:06:10", "remaining_time": "0:47:31", "throughput": 5770.81, "total_tokens": 2140352} +{"current_steps": 4365, "total_steps": 37885, "loss": 0.1082, "lr": 1.998596872984539e-06, "epoch": 0.5760855219743962, "percentage": 11.52, "elapsed_time": "0:06:11", "remaining_time": "0:47:30", "throughput": 5772.8, "total_tokens": 2143040} +{"current_steps": 4370, "total_steps": 37885, "loss": 0.0335, "lr": 1.998572370512128e-06, "epoch": 0.5767454137521446, "percentage": 11.53, "elapsed_time": "0:06:11", "remaining_time": "0:47:29", "throughput": 5773.66, "total_tokens": 2145280} +{"current_steps": 4375, "total_steps": 37885, "loss": 0.0719, "lr": 1.998547656099963e-06, "epoch": 0.5774053055298931, "percentage": 11.55, "elapsed_time": "0:06:11", "remaining_time": "0:47:28", "throughput": 5775.47, "total_tokens": 2147904} +{"current_steps": 4380, "total_steps": 37885, "loss": 0.0904, "lr": 1.9985227297532886e-06, "epoch": 0.5780651973076415, "percentage": 11.56, "elapsed_time": "0:06:12", "remaining_time": "0:47:27", "throughput": 5776.95, "total_tokens": 2150400} +{"current_steps": 4385, "total_steps": 37885, "loss": 0.1622, "lr": 1.9984975914773957e-06, "epoch": 0.57872508908539, "percentage": 11.57, "elapsed_time": "0:06:12", "remaining_time": "0:47:26", "throughput": 5778.9, "total_tokens": 2153088} +{"current_steps": 4390, "total_steps": 37885, "loss": 0.0055, "lr": 1.9984722412776197e-06, "epoch": 0.5793849808631385, "percentage": 11.59, "elapsed_time": "0:06:12", "remaining_time": "0:47:25", "throughput": 5780.92, "total_tokens": 2155776} +{"current_steps": 4395, "total_steps": 37885, "loss": 0.2532, "lr": 1.9984466791593407e-06, "epoch": 0.5800448726408869, "percentage": 11.6, "elapsed_time": "0:06:13", "remaining_time": "0:47:24", "throughput": 5782.67, "total_tokens": 2158400} +{"current_steps": 4400, "total_steps": 37885, "loss": 0.0378, "lr": 1.9984209051279843e-06, "epoch": 0.5807047644186354, "percentage": 11.61, "elapsed_time": "0:06:13", "remaining_time": "0:47:23", "throughput": 5783.61, "total_tokens": 2160704} +{"current_steps": 4405, "total_steps": 37885, "loss": 0.0141, "lr": 1.998394919189021e-06, "epoch": 0.5813646561963838, "percentage": 11.63, "elapsed_time": "0:06:13", "remaining_time": "0:47:22", "throughput": 5785.08, "total_tokens": 2163200} +{"current_steps": 4410, "total_steps": 37885, "loss": 0.137, "lr": 1.9983687213479655e-06, "epoch": 0.5820245479741323, "percentage": 11.64, "elapsed_time": "0:06:14", "remaining_time": "0:47:20", "throughput": 5785.69, "total_tokens": 2165376} +{"current_steps": 4415, "total_steps": 37885, "loss": 0.2062, "lr": 1.998342311610379e-06, "epoch": 0.5826844397518807, "percentage": 11.65, "elapsed_time": "0:06:14", "remaining_time": "0:47:19", "throughput": 5786.98, "total_tokens": 2167808} +{"current_steps": 4420, "total_steps": 37885, "loss": 0.225, "lr": 1.998315689981866e-06, "epoch": 0.5833443315296292, "percentage": 11.67, "elapsed_time": "0:06:14", "remaining_time": "0:47:18", "throughput": 5787.98, "total_tokens": 2170112} +{"current_steps": 4425, "total_steps": 37885, "loss": 0.0063, "lr": 1.998288856468077e-06, "epoch": 0.5840042233073776, "percentage": 11.68, "elapsed_time": "0:06:15", "remaining_time": "0:47:17", "throughput": 5789.13, "total_tokens": 2172480} +{"current_steps": 4430, "total_steps": 37885, "loss": 0.1578, "lr": 1.998261811074707e-06, "epoch": 0.5846641150851261, "percentage": 11.69, "elapsed_time": "0:06:15", "remaining_time": "0:47:16", "throughput": 5790.88, "total_tokens": 2175104} +{"current_steps": 4435, "total_steps": 37885, "loss": 0.0695, "lr": 1.998234553807497e-06, "epoch": 0.5853240068628744, "percentage": 11.71, "elapsed_time": "0:06:15", "remaining_time": "0:47:15", "throughput": 5791.61, "total_tokens": 2177280} +{"current_steps": 4440, "total_steps": 37885, "loss": 0.0882, "lr": 1.9982070846722312e-06, "epoch": 0.585983898640623, "percentage": 11.72, "elapsed_time": "0:06:16", "remaining_time": "0:47:14", "throughput": 5793.2, "total_tokens": 2179776} +{"current_steps": 4445, "total_steps": 37885, "loss": 0.1574, "lr": 1.9981794036747402e-06, "epoch": 0.5866437904183713, "percentage": 11.73, "elapsed_time": "0:06:16", "remaining_time": "0:47:13", "throughput": 5795.02, "total_tokens": 2182400} +{"current_steps": 4450, "total_steps": 37885, "loss": 0.0313, "lr": 1.998151510820899e-06, "epoch": 0.5873036821961198, "percentage": 11.75, "elapsed_time": "0:06:16", "remaining_time": "0:47:12", "throughput": 5797.07, "total_tokens": 2185088} +{"current_steps": 4455, "total_steps": 37885, "loss": 0.0876, "lr": 1.9981234061166275e-06, "epoch": 0.5879635739738683, "percentage": 11.76, "elapsed_time": "0:06:17", "remaining_time": "0:47:10", "throughput": 5799.13, "total_tokens": 2187776} +{"current_steps": 4460, "total_steps": 37885, "loss": 0.0183, "lr": 1.9980950895678914e-06, "epoch": 0.5886234657516167, "percentage": 11.77, "elapsed_time": "0:06:17", "remaining_time": "0:47:09", "throughput": 5799.99, "total_tokens": 2190016} +{"current_steps": 4465, "total_steps": 37885, "loss": 0.0676, "lr": 1.9980665611806998e-06, "epoch": 0.5892833575293652, "percentage": 11.79, "elapsed_time": "0:06:17", "remaining_time": "0:47:08", "throughput": 5801.06, "total_tokens": 2192320} +{"current_steps": 4470, "total_steps": 37885, "loss": 0.1909, "lr": 1.998037820961108e-06, "epoch": 0.5899432493071136, "percentage": 11.8, "elapsed_time": "0:06:18", "remaining_time": "0:47:07", "throughput": 5802.44, "total_tokens": 2194752} +{"current_steps": 4475, "total_steps": 37885, "loss": 0.1777, "lr": 1.9980088689152163e-06, "epoch": 0.5906031410848621, "percentage": 11.81, "elapsed_time": "0:06:18", "remaining_time": "0:47:06", "throughput": 5803.53, "total_tokens": 2197056} +{"current_steps": 4480, "total_steps": 37885, "loss": 0.092, "lr": 1.9979797050491687e-06, "epoch": 0.5912630328626105, "percentage": 11.83, "elapsed_time": "0:06:18", "remaining_time": "0:47:05", "throughput": 5804.45, "total_tokens": 2199296} +{"current_steps": 4485, "total_steps": 37885, "loss": 0.3187, "lr": 1.997950329369156e-06, "epoch": 0.591922924640359, "percentage": 11.84, "elapsed_time": "0:06:19", "remaining_time": "0:47:04", "throughput": 5805.69, "total_tokens": 2201664} +{"current_steps": 4490, "total_steps": 37885, "loss": 0.1067, "lr": 1.997920741881412e-06, "epoch": 0.5925828164181074, "percentage": 11.85, "elapsed_time": "0:06:19", "remaining_time": "0:47:03", "throughput": 5807.48, "total_tokens": 2204288} +{"current_steps": 4495, "total_steps": 37885, "loss": 0.1064, "lr": 1.997890942592217e-06, "epoch": 0.5932427081958559, "percentage": 11.86, "elapsed_time": "0:06:19", "remaining_time": "0:47:01", "throughput": 5808.32, "total_tokens": 2206528} +{"current_steps": 4500, "total_steps": 37885, "loss": 0.1168, "lr": 1.997860931507896e-06, "epoch": 0.5939025999736043, "percentage": 11.88, "elapsed_time": "0:06:20", "remaining_time": "0:47:00", "throughput": 5809.83, "total_tokens": 2209024} +{"current_steps": 4505, "total_steps": 37885, "loss": 0.0681, "lr": 1.997830708634818e-06, "epoch": 0.5945624917513528, "percentage": 11.89, "elapsed_time": "0:06:20", "remaining_time": "0:46:59", "throughput": 5811.48, "total_tokens": 2211584} +{"current_steps": 4510, "total_steps": 37885, "loss": 0.1479, "lr": 1.9978002739793977e-06, "epoch": 0.5952223835291012, "percentage": 11.9, "elapsed_time": "0:06:20", "remaining_time": "0:46:58", "throughput": 5812.66, "total_tokens": 2213952} +{"current_steps": 4515, "total_steps": 37885, "loss": 0.1002, "lr": 1.9977696275480945e-06, "epoch": 0.5958822753068497, "percentage": 11.92, "elapsed_time": "0:06:21", "remaining_time": "0:46:57", "throughput": 5813.52, "total_tokens": 2216192} +{"current_steps": 4520, "total_steps": 37885, "loss": 0.0057, "lr": 1.9977387693474134e-06, "epoch": 0.5965421670845982, "percentage": 11.93, "elapsed_time": "0:06:21", "remaining_time": "0:46:56", "throughput": 5814.97, "total_tokens": 2218688} +{"current_steps": 4525, "total_steps": 37885, "loss": 0.0011, "lr": 1.9977076993839037e-06, "epoch": 0.5972020588623466, "percentage": 11.94, "elapsed_time": "0:06:21", "remaining_time": "0:46:55", "throughput": 5815.79, "total_tokens": 2220928} +{"current_steps": 4530, "total_steps": 37885, "loss": 0.001, "lr": 1.9976764176641592e-06, "epoch": 0.5978619506400951, "percentage": 11.96, "elapsed_time": "0:06:22", "remaining_time": "0:46:54", "throughput": 5817.09, "total_tokens": 2223360} +{"current_steps": 4535, "total_steps": 37885, "loss": 0.1205, "lr": 1.99764492419482e-06, "epoch": 0.5985218424178435, "percentage": 11.97, "elapsed_time": "0:06:22", "remaining_time": "0:46:53", "throughput": 5818.46, "total_tokens": 2225792} +{"current_steps": 4540, "total_steps": 37885, "loss": 0.2427, "lr": 1.99761321898257e-06, "epoch": 0.599181734195592, "percentage": 11.98, "elapsed_time": "0:06:22", "remaining_time": "0:46:52", "throughput": 5819.95, "total_tokens": 2228288} +{"current_steps": 4545, "total_steps": 37885, "loss": 0.2698, "lr": 1.9975813020341387e-06, "epoch": 0.5998416259733403, "percentage": 12.0, "elapsed_time": "0:06:23", "remaining_time": "0:46:51", "throughput": 5821.57, "total_tokens": 2230848} +{"current_steps": 4550, "total_steps": 37885, "loss": 0.2384, "lr": 1.9975491733562997e-06, "epoch": 0.6005015177510888, "percentage": 12.01, "elapsed_time": "0:06:23", "remaining_time": "0:46:49", "throughput": 5823.35, "total_tokens": 2233472} +{"current_steps": 4555, "total_steps": 37885, "loss": 0.2191, "lr": 1.9975168329558725e-06, "epoch": 0.6011614095288372, "percentage": 12.02, "elapsed_time": "0:06:23", "remaining_time": "0:46:48", "throughput": 5825.19, "total_tokens": 2236096} +{"current_steps": 4560, "total_steps": 37885, "loss": 0.1075, "lr": 1.9974842808397206e-06, "epoch": 0.6018213013065857, "percentage": 12.04, "elapsed_time": "0:06:24", "remaining_time": "0:46:47", "throughput": 5826.97, "total_tokens": 2238720} +{"current_steps": 4565, "total_steps": 37885, "loss": 0.1344, "lr": 1.9974515170147533e-06, "epoch": 0.6024811930843341, "percentage": 12.05, "elapsed_time": "0:06:24", "remaining_time": "0:46:46", "throughput": 5828.44, "total_tokens": 2241216} +{"current_steps": 4570, "total_steps": 37885, "loss": 0.07, "lr": 1.997418541487925e-06, "epoch": 0.6031410848620826, "percentage": 12.06, "elapsed_time": "0:06:24", "remaining_time": "0:46:45", "throughput": 5829.81, "total_tokens": 2243648} +{"current_steps": 4575, "total_steps": 37885, "loss": 0.1225, "lr": 1.9973853542662336e-06, "epoch": 0.6038009766398311, "percentage": 12.08, "elapsed_time": "0:06:25", "remaining_time": "0:46:44", "throughput": 5831.13, "total_tokens": 2246080} +{"current_steps": 4580, "total_steps": 37885, "loss": 0.0581, "lr": 1.9973519553567233e-06, "epoch": 0.6044608684175795, "percentage": 12.09, "elapsed_time": "0:06:25", "remaining_time": "0:46:43", "throughput": 5831.82, "total_tokens": 2248256} +{"current_steps": 4585, "total_steps": 37885, "loss": 0.0503, "lr": 1.9973183447664826e-06, "epoch": 0.605120760195328, "percentage": 12.1, "elapsed_time": "0:06:25", "remaining_time": "0:46:42", "throughput": 5833.16, "total_tokens": 2250688} +{"current_steps": 4590, "total_steps": 37885, "loss": 0.2459, "lr": 1.9972845225026458e-06, "epoch": 0.6057806519730764, "percentage": 12.12, "elapsed_time": "0:06:26", "remaining_time": "0:46:41", "throughput": 5834.52, "total_tokens": 2253120} +{"current_steps": 4595, "total_steps": 37885, "loss": 0.2109, "lr": 1.99725048857239e-06, "epoch": 0.6064405437508249, "percentage": 12.13, "elapsed_time": "0:06:26", "remaining_time": "0:46:40", "throughput": 5835.4, "total_tokens": 2255360} +{"current_steps": 4600, "total_steps": 37885, "loss": 0.1823, "lr": 1.99721624298294e-06, "epoch": 0.6071004355285733, "percentage": 12.14, "elapsed_time": "0:06:26", "remaining_time": "0:46:39", "throughput": 5836.59, "total_tokens": 2257728} +{"current_steps": 4605, "total_steps": 37885, "loss": 0.0686, "lr": 1.997181785741564e-06, "epoch": 0.6077603273063218, "percentage": 12.16, "elapsed_time": "0:06:27", "remaining_time": "0:46:37", "throughput": 5838.08, "total_tokens": 2260224} +{"current_steps": 4610, "total_steps": 37885, "loss": 0.0076, "lr": 1.9971471168555746e-06, "epoch": 0.6084202190840702, "percentage": 12.17, "elapsed_time": "0:06:27", "remaining_time": "0:46:36", "throughput": 5840.03, "total_tokens": 2262912} +{"current_steps": 4615, "total_steps": 37885, "loss": 0.2299, "lr": 1.9971122363323307e-06, "epoch": 0.6090801108618187, "percentage": 12.18, "elapsed_time": "0:06:27", "remaining_time": "0:46:35", "throughput": 5840.92, "total_tokens": 2265152} +{"current_steps": 4620, "total_steps": 37885, "loss": 0.0823, "lr": 1.9970771441792347e-06, "epoch": 0.6097400026395671, "percentage": 12.19, "elapsed_time": "0:06:28", "remaining_time": "0:46:34", "throughput": 5843.11, "total_tokens": 2267968} +{"current_steps": 4625, "total_steps": 37885, "loss": 0.1992, "lr": 1.997041840403735e-06, "epoch": 0.6103998944173156, "percentage": 12.21, "elapsed_time": "0:06:28", "remaining_time": "0:46:33", "throughput": 5844.3, "total_tokens": 2270336} +{"current_steps": 4630, "total_steps": 37885, "loss": 0.2115, "lr": 1.997006325013325e-06, "epoch": 0.611059786195064, "percentage": 12.22, "elapsed_time": "0:06:28", "remaining_time": "0:46:32", "throughput": 5846.2, "total_tokens": 2273024} +{"current_steps": 4635, "total_steps": 37885, "loss": 0.0794, "lr": 1.9969705980155426e-06, "epoch": 0.6117196779728125, "percentage": 12.23, "elapsed_time": "0:06:29", "remaining_time": "0:46:31", "throughput": 5847.1, "total_tokens": 2275264} +{"current_steps": 4640, "total_steps": 37885, "loss": 0.0776, "lr": 1.99693465941797e-06, "epoch": 0.612379569750561, "percentage": 12.25, "elapsed_time": "0:06:29", "remaining_time": "0:46:30", "throughput": 5848.87, "total_tokens": 2277888} +{"current_steps": 4645, "total_steps": 37885, "loss": 0.002, "lr": 1.9968985092282354e-06, "epoch": 0.6130394615283093, "percentage": 12.26, "elapsed_time": "0:06:29", "remaining_time": "0:46:29", "throughput": 5850.18, "total_tokens": 2280320} +{"current_steps": 4650, "total_steps": 37885, "loss": 0.0637, "lr": 1.996862147454011e-06, "epoch": 0.6136993533060578, "percentage": 12.27, "elapsed_time": "0:06:30", "remaining_time": "0:46:28", "throughput": 5851.05, "total_tokens": 2282560} +{"current_steps": 4655, "total_steps": 37885, "loss": 0.121, "lr": 1.9968255741030144e-06, "epoch": 0.6143592450838062, "percentage": 12.29, "elapsed_time": "0:06:30", "remaining_time": "0:46:27", "throughput": 5852.05, "total_tokens": 2284864} +{"current_steps": 4660, "total_steps": 37885, "loss": 0.1595, "lr": 1.9967887891830082e-06, "epoch": 0.6150191368615547, "percentage": 12.3, "elapsed_time": "0:06:30", "remaining_time": "0:46:26", "throughput": 5853.06, "total_tokens": 2287168} +{"current_steps": 4665, "total_steps": 37885, "loss": 0.0561, "lr": 1.9967517927017995e-06, "epoch": 0.6156790286393031, "percentage": 12.31, "elapsed_time": "0:06:31", "remaining_time": "0:46:25", "throughput": 5854.33, "total_tokens": 2289600} +{"current_steps": 4670, "total_steps": 37885, "loss": 0.1015, "lr": 1.996714584667241e-06, "epoch": 0.6163389204170516, "percentage": 12.33, "elapsed_time": "0:06:31", "remaining_time": "0:46:23", "throughput": 5855.93, "total_tokens": 2292160} +{"current_steps": 4675, "total_steps": 37885, "loss": 0.0621, "lr": 1.9966771650872295e-06, "epoch": 0.6169988121948, "percentage": 12.34, "elapsed_time": "0:06:31", "remaining_time": "0:46:22", "throughput": 5857.96, "total_tokens": 2294912} +{"current_steps": 4680, "total_steps": 37885, "loss": 0.0647, "lr": 1.996639533969707e-06, "epoch": 0.6176587039725485, "percentage": 12.35, "elapsed_time": "0:06:32", "remaining_time": "0:46:21", "throughput": 5858.48, "total_tokens": 2297024} +{"current_steps": 4685, "total_steps": 37885, "loss": 0.2015, "lr": 1.9966016913226602e-06, "epoch": 0.6183185957502969, "percentage": 12.37, "elapsed_time": "0:06:32", "remaining_time": "0:46:20", "throughput": 5859.76, "total_tokens": 2299456} +{"current_steps": 4690, "total_steps": 37885, "loss": 0.062, "lr": 1.9965636371541217e-06, "epoch": 0.6189784875280454, "percentage": 12.38, "elapsed_time": "0:06:32", "remaining_time": "0:46:19", "throughput": 5860.28, "total_tokens": 2301568} +{"current_steps": 4695, "total_steps": 37885, "loss": 0.1759, "lr": 1.9965253714721676e-06, "epoch": 0.6196383793057938, "percentage": 12.39, "elapsed_time": "0:06:33", "remaining_time": "0:46:18", "throughput": 5861.44, "total_tokens": 2303936} +{"current_steps": 4700, "total_steps": 37885, "loss": 0.1793, "lr": 1.99648689428492e-06, "epoch": 0.6202982710835423, "percentage": 12.41, "elapsed_time": "0:06:33", "remaining_time": "0:46:17", "throughput": 5862.24, "total_tokens": 2306176} +{"current_steps": 4705, "total_steps": 37885, "loss": 0.046, "lr": 1.9964482056005446e-06, "epoch": 0.6209581628612908, "percentage": 12.42, "elapsed_time": "0:06:33", "remaining_time": "0:46:16", "throughput": 5863.81, "total_tokens": 2308736} +{"current_steps": 4710, "total_steps": 37885, "loss": 0.2129, "lr": 1.9964093054272534e-06, "epoch": 0.6216180546390392, "percentage": 12.43, "elapsed_time": "0:06:34", "remaining_time": "0:46:15", "throughput": 5864.91, "total_tokens": 2311104} +{"current_steps": 4715, "total_steps": 37885, "loss": 0.1223, "lr": 1.9963701937733024e-06, "epoch": 0.6222779464167877, "percentage": 12.45, "elapsed_time": "0:06:34", "remaining_time": "0:46:14", "throughput": 5866.16, "total_tokens": 2313536} +{"current_steps": 4720, "total_steps": 37885, "loss": 0.1751, "lr": 1.9963308706469932e-06, "epoch": 0.6229378381945361, "percentage": 12.46, "elapsed_time": "0:06:34", "remaining_time": "0:46:13", "throughput": 5867.57, "total_tokens": 2316032} +{"current_steps": 4725, "total_steps": 37885, "loss": 0.2008, "lr": 1.9962913360566713e-06, "epoch": 0.6235977299722846, "percentage": 12.47, "elapsed_time": "0:06:35", "remaining_time": "0:46:12", "throughput": 5869.23, "total_tokens": 2318656} +{"current_steps": 4730, "total_steps": 37885, "loss": 0.1295, "lr": 1.9962515900107283e-06, "epoch": 0.624257621750033, "percentage": 12.49, "elapsed_time": "0:06:35", "remaining_time": "0:46:11", "throughput": 5870.76, "total_tokens": 2321216} +{"current_steps": 4735, "total_steps": 37885, "loss": 0.1381, "lr": 1.9962116325175993e-06, "epoch": 0.6249175135277815, "percentage": 12.5, "elapsed_time": "0:06:35", "remaining_time": "0:46:10", "throughput": 5871.97, "total_tokens": 2323648} +{"current_steps": 4740, "total_steps": 37885, "loss": 0.1807, "lr": 1.996171463585765e-06, "epoch": 0.6255774053055299, "percentage": 12.51, "elapsed_time": "0:06:36", "remaining_time": "0:46:09", "throughput": 5873.19, "total_tokens": 2326080} +{"current_steps": 4745, "total_steps": 37885, "loss": 0.097, "lr": 1.996131083223752e-06, "epoch": 0.6262372970832784, "percentage": 12.52, "elapsed_time": "0:06:36", "remaining_time": "0:46:08", "throughput": 5874.43, "total_tokens": 2328512} +{"current_steps": 4750, "total_steps": 37885, "loss": 0.1358, "lr": 1.9960904914401295e-06, "epoch": 0.6268971888610267, "percentage": 12.54, "elapsed_time": "0:06:36", "remaining_time": "0:46:07", "throughput": 5875.79, "total_tokens": 2331008} +{"current_steps": 4755, "total_steps": 37885, "loss": 0.0575, "lr": 1.9960496882435138e-06, "epoch": 0.6275570806387752, "percentage": 12.55, "elapsed_time": "0:06:37", "remaining_time": "0:46:06", "throughput": 5876.9, "total_tokens": 2333376} +{"current_steps": 4760, "total_steps": 37885, "loss": 0.2401, "lr": 1.996008673642564e-06, "epoch": 0.6282169724165237, "percentage": 12.56, "elapsed_time": "0:06:37", "remaining_time": "0:46:05", "throughput": 5878.27, "total_tokens": 2335872} +{"current_steps": 4765, "total_steps": 37885, "loss": 0.0035, "lr": 1.995967447645986e-06, "epoch": 0.6288768641942721, "percentage": 12.58, "elapsed_time": "0:06:37", "remaining_time": "0:46:04", "throughput": 5879.82, "total_tokens": 2338432} +{"current_steps": 4770, "total_steps": 37885, "loss": 0.2603, "lr": 1.9959260102625293e-06, "epoch": 0.6295367559720206, "percentage": 12.59, "elapsed_time": "0:06:38", "remaining_time": "0:46:03", "throughput": 5881.18, "total_tokens": 2340928} +{"current_steps": 4775, "total_steps": 37885, "loss": 0.1541, "lr": 1.9958843615009892e-06, "epoch": 0.630196647749769, "percentage": 12.6, "elapsed_time": "0:06:38", "remaining_time": "0:46:02", "throughput": 5883.14, "total_tokens": 2343680} +{"current_steps": 4780, "total_steps": 37885, "loss": 0.062, "lr": 1.995842501370205e-06, "epoch": 0.6308565395275175, "percentage": 12.62, "elapsed_time": "0:06:38", "remaining_time": "0:46:01", "throughput": 5884.67, "total_tokens": 2346240} +{"current_steps": 4785, "total_steps": 37885, "loss": 0.1132, "lr": 1.9958004298790607e-06, "epoch": 0.6315164313052659, "percentage": 12.63, "elapsed_time": "0:06:39", "remaining_time": "0:46:00", "throughput": 5885.64, "total_tokens": 2348544} +{"current_steps": 4790, "total_steps": 37885, "loss": 0.3771, "lr": 1.9957581470364867e-06, "epoch": 0.6321763230830144, "percentage": 12.64, "elapsed_time": "0:06:39", "remaining_time": "0:45:59", "throughput": 5886.86, "total_tokens": 2350976} +{"current_steps": 4795, "total_steps": 37885, "loss": 0.1463, "lr": 1.9957156528514564e-06, "epoch": 0.6328362148607628, "percentage": 12.66, "elapsed_time": "0:06:39", "remaining_time": "0:45:58", "throughput": 5887.68, "total_tokens": 2353216} +{"current_steps": 4800, "total_steps": 37885, "loss": 0.1893, "lr": 1.995672947332989e-06, "epoch": 0.6334961066385113, "percentage": 12.67, "elapsed_time": "0:06:40", "remaining_time": "0:45:57", "throughput": 5888.73, "total_tokens": 2355584} +{"current_steps": 4805, "total_steps": 37885, "loss": 0.0834, "lr": 1.995630030490149e-06, "epoch": 0.6341559984162597, "percentage": 12.68, "elapsed_time": "0:06:40", "remaining_time": "0:45:56", "throughput": 5890.25, "total_tokens": 2358144} +{"current_steps": 4810, "total_steps": 37885, "loss": 0.0498, "lr": 1.9955869023320447e-06, "epoch": 0.6348158901940082, "percentage": 12.7, "elapsed_time": "0:06:40", "remaining_time": "0:45:55", "throughput": 5892.19, "total_tokens": 2360896} +{"current_steps": 4815, "total_steps": 37885, "loss": 0.2227, "lr": 1.99554356286783e-06, "epoch": 0.6354757819717566, "percentage": 12.71, "elapsed_time": "0:06:41", "remaining_time": "0:45:54", "throughput": 5893.27, "total_tokens": 2363264} +{"current_steps": 4820, "total_steps": 37885, "loss": 0.0659, "lr": 1.9955000121067035e-06, "epoch": 0.6361356737495051, "percentage": 12.72, "elapsed_time": "0:06:41", "remaining_time": "0:45:53", "throughput": 5894.37, "total_tokens": 2365632} +{"current_steps": 4825, "total_steps": 37885, "loss": 0.0047, "lr": 1.9954562500579075e-06, "epoch": 0.6367955655272536, "percentage": 12.74, "elapsed_time": "0:06:41", "remaining_time": "0:45:52", "throughput": 5895.46, "total_tokens": 2368000} +{"current_steps": 4830, "total_steps": 37885, "loss": 0.122, "lr": 1.9954122767307316e-06, "epoch": 0.637455457305002, "percentage": 12.75, "elapsed_time": "0:06:41", "remaining_time": "0:45:51", "throughput": 5897.0, "total_tokens": 2370560} +{"current_steps": 4835, "total_steps": 37885, "loss": 0.0527, "lr": 1.995368092134508e-06, "epoch": 0.6381153490827505, "percentage": 12.76, "elapsed_time": "0:06:42", "remaining_time": "0:45:50", "throughput": 5898.5, "total_tokens": 2373120} +{"current_steps": 4840, "total_steps": 37885, "loss": 0.004, "lr": 1.9953236962786143e-06, "epoch": 0.6387752408604989, "percentage": 12.78, "elapsed_time": "0:06:42", "remaining_time": "0:45:49", "throughput": 5900.46, "total_tokens": 2375872} +{"current_steps": 4845, "total_steps": 37885, "loss": 0.2211, "lr": 1.995279089172474e-06, "epoch": 0.6394351326382474, "percentage": 12.79, "elapsed_time": "0:06:42", "remaining_time": "0:45:48", "throughput": 5901.96, "total_tokens": 2378432} +{"current_steps": 4850, "total_steps": 37885, "loss": 0.1565, "lr": 1.9952342708255543e-06, "epoch": 0.6400950244159958, "percentage": 12.8, "elapsed_time": "0:06:43", "remaining_time": "0:45:47", "throughput": 5903.08, "total_tokens": 2380800} +{"current_steps": 4855, "total_steps": 37885, "loss": 0.1074, "lr": 1.9951892412473677e-06, "epoch": 0.6407549161937443, "percentage": 12.82, "elapsed_time": "0:06:43", "remaining_time": "0:45:46", "throughput": 5905.45, "total_tokens": 2383744} +{"current_steps": 4860, "total_steps": 37885, "loss": 0.124, "lr": 1.9951440004474707e-06, "epoch": 0.6414148079714926, "percentage": 12.83, "elapsed_time": "0:06:43", "remaining_time": "0:45:45", "throughput": 5906.56, "total_tokens": 2386112} +{"current_steps": 4865, "total_steps": 37885, "loss": 0.1265, "lr": 1.9950985484354664e-06, "epoch": 0.6420746997492411, "percentage": 12.84, "elapsed_time": "0:06:44", "remaining_time": "0:45:44", "throughput": 5908.24, "total_tokens": 2388736} +{"current_steps": 4870, "total_steps": 37885, "loss": 0.0334, "lr": 1.9950528852210013e-06, "epoch": 0.6427345915269895, "percentage": 12.85, "elapsed_time": "0:06:44", "remaining_time": "0:45:43", "throughput": 5909.32, "total_tokens": 2391104} +{"current_steps": 4875, "total_steps": 37885, "loss": 0.1468, "lr": 1.9950070108137663e-06, "epoch": 0.643394483304738, "percentage": 12.87, "elapsed_time": "0:06:44", "remaining_time": "0:45:42", "throughput": 5910.97, "total_tokens": 2393728} +{"current_steps": 4880, "total_steps": 37885, "loss": 0.1562, "lr": 1.9949609252234985e-06, "epoch": 0.6440543750824864, "percentage": 12.88, "elapsed_time": "0:06:45", "remaining_time": "0:45:41", "throughput": 5912.88, "total_tokens": 2396480} +{"current_steps": 4885, "total_steps": 37885, "loss": 0.1169, "lr": 1.9949146284599794e-06, "epoch": 0.6447142668602349, "percentage": 12.89, "elapsed_time": "0:06:45", "remaining_time": "0:45:40", "throughput": 5914.5, "total_tokens": 2399104} +{"current_steps": 4890, "total_steps": 37885, "loss": 0.096, "lr": 1.9948681205330354e-06, "epoch": 0.6453741586379834, "percentage": 12.91, "elapsed_time": "0:06:45", "remaining_time": "0:45:39", "throughput": 5916.02, "total_tokens": 2401664} +{"current_steps": 4895, "total_steps": 37885, "loss": 0.1063, "lr": 1.994821401452537e-06, "epoch": 0.6460340504157318, "percentage": 12.92, "elapsed_time": "0:06:46", "remaining_time": "0:45:38", "throughput": 5917.4, "total_tokens": 2404160} +{"current_steps": 4900, "total_steps": 37885, "loss": 0.0837, "lr": 1.9947744712283997e-06, "epoch": 0.6466939421934803, "percentage": 12.93, "elapsed_time": "0:06:46", "remaining_time": "0:45:37", "throughput": 5918.6, "total_tokens": 2406592} +{"current_steps": 4905, "total_steps": 37885, "loss": 0.0537, "lr": 1.9947273298705848e-06, "epoch": 0.6473538339712287, "percentage": 12.95, "elapsed_time": "0:06:46", "remaining_time": "0:45:36", "throughput": 5919.92, "total_tokens": 2409088} +{"current_steps": 4910, "total_steps": 37885, "loss": 0.173, "lr": 1.994679977389097e-06, "epoch": 0.6480137257489772, "percentage": 12.96, "elapsed_time": "0:06:47", "remaining_time": "0:45:35", "throughput": 5921.27, "total_tokens": 2411584} +{"current_steps": 4915, "total_steps": 37885, "loss": 0.3713, "lr": 1.9946324137939876e-06, "epoch": 0.6486736175267256, "percentage": 12.97, "elapsed_time": "0:06:47", "remaining_time": "0:45:34", "throughput": 5923.29, "total_tokens": 2414400} +{"current_steps": 4920, "total_steps": 37885, "loss": 0.22, "lr": 1.9945846390953503e-06, "epoch": 0.6493335093044741, "percentage": 12.99, "elapsed_time": "0:06:47", "remaining_time": "0:45:33", "throughput": 5924.03, "total_tokens": 2416640} +{"current_steps": 4925, "total_steps": 37885, "loss": 0.1468, "lr": 1.994536653303326e-06, "epoch": 0.6499934010822225, "percentage": 13.0, "elapsed_time": "0:06:48", "remaining_time": "0:45:32", "throughput": 5925.34, "total_tokens": 2419136} +{"current_steps": 4930, "total_steps": 37885, "loss": 0.0354, "lr": 1.9944884564280987e-06, "epoch": 0.650653292859971, "percentage": 13.01, "elapsed_time": "0:06:48", "remaining_time": "0:45:31", "throughput": 5926.25, "total_tokens": 2421440} +{"current_steps": 4935, "total_steps": 37885, "loss": 0.0854, "lr": 1.994440048479898e-06, "epoch": 0.6513131846377194, "percentage": 13.03, "elapsed_time": "0:06:48", "remaining_time": "0:45:30", "throughput": 5927.76, "total_tokens": 2424000} +{"current_steps": 4940, "total_steps": 37885, "loss": 0.0808, "lr": 1.9943914294689984e-06, "epoch": 0.6519730764154679, "percentage": 13.04, "elapsed_time": "0:06:49", "remaining_time": "0:45:29", "throughput": 5928.53, "total_tokens": 2426240} +{"current_steps": 4945, "total_steps": 37885, "loss": 0.0641, "lr": 1.9943425994057184e-06, "epoch": 0.6526329681932164, "percentage": 13.05, "elapsed_time": "0:06:49", "remaining_time": "0:45:28", "throughput": 5930.19, "total_tokens": 2428864} +{"current_steps": 4950, "total_steps": 37885, "loss": 0.0188, "lr": 1.994293558300422e-06, "epoch": 0.6532928599709648, "percentage": 13.07, "elapsed_time": "0:06:49", "remaining_time": "0:45:27", "throughput": 5931.41, "total_tokens": 2431296} +{"current_steps": 4955, "total_steps": 37885, "loss": 0.0037, "lr": 1.9942443061635183e-06, "epoch": 0.6539527517487133, "percentage": 13.08, "elapsed_time": "0:06:50", "remaining_time": "0:45:26", "throughput": 5933.08, "total_tokens": 2433984} +{"current_steps": 4960, "total_steps": 37885, "loss": 0.5298, "lr": 1.9941948430054603e-06, "epoch": 0.6546126435264616, "percentage": 13.09, "elapsed_time": "0:06:50", "remaining_time": "0:45:25", "throughput": 5933.79, "total_tokens": 2436224} +{"current_steps": 4965, "total_steps": 37885, "loss": 0.1602, "lr": 1.994145168836746e-06, "epoch": 0.6552725353042101, "percentage": 13.11, "elapsed_time": "0:06:50", "remaining_time": "0:45:24", "throughput": 5935.16, "total_tokens": 2438720} +{"current_steps": 4970, "total_steps": 37885, "loss": 0.1132, "lr": 1.994095283667919e-06, "epoch": 0.6559324270819585, "percentage": 13.12, "elapsed_time": "0:06:51", "remaining_time": "0:45:23", "throughput": 5935.87, "total_tokens": 2440960} +{"current_steps": 4975, "total_steps": 37885, "loss": 0.0129, "lr": 1.9940451875095666e-06, "epoch": 0.656592318859707, "percentage": 13.13, "elapsed_time": "0:06:51", "remaining_time": "0:45:22", "throughput": 5936.84, "total_tokens": 2443328} +{"current_steps": 4980, "total_steps": 37885, "loss": 0.1357, "lr": 1.9939948803723217e-06, "epoch": 0.6572522106374554, "percentage": 13.15, "elapsed_time": "0:06:51", "remaining_time": "0:45:21", "throughput": 5938.43, "total_tokens": 2445952} +{"current_steps": 4985, "total_steps": 37885, "loss": 0.0527, "lr": 1.9939443622668614e-06, "epoch": 0.6579121024152039, "percentage": 13.16, "elapsed_time": "0:06:52", "remaining_time": "0:45:20", "throughput": 5940.03, "total_tokens": 2448576} +{"current_steps": 4990, "total_steps": 37885, "loss": 0.3274, "lr": 1.9938936332039073e-06, "epoch": 0.6585719941929523, "percentage": 13.17, "elapsed_time": "0:06:52", "remaining_time": "0:45:19", "throughput": 5941.46, "total_tokens": 2451136} +{"current_steps": 4995, "total_steps": 37885, "loss": 0.0122, "lr": 1.993842693194227e-06, "epoch": 0.6592318859707008, "percentage": 13.18, "elapsed_time": "0:06:52", "remaining_time": "0:45:18", "throughput": 5942.78, "total_tokens": 2453632} +{"current_steps": 5000, "total_steps": 37885, "loss": 0.1135, "lr": 1.993791542248632e-06, "epoch": 0.6598917777484492, "percentage": 13.2, "elapsed_time": "0:06:53", "remaining_time": "0:45:17", "throughput": 5944.2, "total_tokens": 2456192} +{"current_steps": 5005, "total_steps": 37885, "loss": 0.0259, "lr": 1.9937401803779784e-06, "epoch": 0.6605516695261977, "percentage": 13.21, "elapsed_time": "0:06:53", "remaining_time": "0:45:16", "throughput": 5945.33, "total_tokens": 2458624} +{"current_steps": 5010, "total_steps": 37885, "loss": 0.0829, "lr": 1.9936886075931678e-06, "epoch": 0.6612115613039462, "percentage": 13.22, "elapsed_time": "0:06:53", "remaining_time": "0:45:15", "throughput": 5946.17, "total_tokens": 2460928} +{"current_steps": 5015, "total_steps": 37885, "loss": 0.0726, "lr": 1.993636823905146e-06, "epoch": 0.6618714530816946, "percentage": 13.24, "elapsed_time": "0:06:54", "remaining_time": "0:45:14", "throughput": 5947.73, "total_tokens": 2463552} +{"current_steps": 5020, "total_steps": 37885, "loss": 0.0881, "lr": 1.9935848293249034e-06, "epoch": 0.6625313448594431, "percentage": 13.25, "elapsed_time": "0:06:54", "remaining_time": "0:45:13", "throughput": 5948.62, "total_tokens": 2465856} +{"current_steps": 5025, "total_steps": 37885, "loss": 0.044, "lr": 1.9935326238634763e-06, "epoch": 0.6631912366371915, "percentage": 13.26, "elapsed_time": "0:06:54", "remaining_time": "0:45:12", "throughput": 5949.72, "total_tokens": 2468288} +{"current_steps": 5030, "total_steps": 37885, "loss": 0.3045, "lr": 1.993480207531944e-06, "epoch": 0.66385112841494, "percentage": 13.28, "elapsed_time": "0:06:55", "remaining_time": "0:45:11", "throughput": 5951.29, "total_tokens": 2470912} +{"current_steps": 5035, "total_steps": 37885, "loss": 0.1027, "lr": 1.9934275803414317e-06, "epoch": 0.6645110201926884, "percentage": 13.29, "elapsed_time": "0:06:55", "remaining_time": "0:45:10", "throughput": 5952.86, "total_tokens": 2473536} +{"current_steps": 5040, "total_steps": 37885, "loss": 0.0028, "lr": 1.99337474230311e-06, "epoch": 0.6651709119704369, "percentage": 13.3, "elapsed_time": "0:06:55", "remaining_time": "0:45:10", "throughput": 5954.15, "total_tokens": 2476032} +{"current_steps": 5045, "total_steps": 37885, "loss": 0.2344, "lr": 1.993321693428192e-06, "epoch": 0.6658308037481853, "percentage": 13.32, "elapsed_time": "0:06:56", "remaining_time": "0:45:09", "throughput": 5954.69, "total_tokens": 2478208} +{"current_steps": 5050, "total_steps": 37885, "loss": 0.1178, "lr": 1.9932684337279378e-06, "epoch": 0.6664906955259338, "percentage": 13.33, "elapsed_time": "0:06:56", "remaining_time": "0:45:08", "throughput": 5955.51, "total_tokens": 2480512} +{"current_steps": 5055, "total_steps": 37885, "loss": 0.2015, "lr": 1.9932149632136514e-06, "epoch": 0.6671505873036822, "percentage": 13.34, "elapsed_time": "0:06:56", "remaining_time": "0:45:07", "throughput": 5956.78, "total_tokens": 2483008} +{"current_steps": 5060, "total_steps": 37885, "loss": 0.2345, "lr": 1.9931612818966812e-06, "epoch": 0.6678104790814307, "percentage": 13.36, "elapsed_time": "0:06:57", "remaining_time": "0:45:06", "throughput": 5957.79, "total_tokens": 2485376} +{"current_steps": 5065, "total_steps": 37885, "loss": 0.0378, "lr": 1.993107389788421e-06, "epoch": 0.668470370859179, "percentage": 13.37, "elapsed_time": "0:06:57", "remaining_time": "0:45:05", "throughput": 5959.5, "total_tokens": 2488064} +{"current_steps": 5070, "total_steps": 37885, "loss": 0.0923, "lr": 1.9930532869003085e-06, "epoch": 0.6691302626369275, "percentage": 13.38, "elapsed_time": "0:06:57", "remaining_time": "0:45:04", "throughput": 5960.88, "total_tokens": 2490624} +{"current_steps": 5075, "total_steps": 37885, "loss": 0.4066, "lr": 1.992998973243827e-06, "epoch": 0.669790154414676, "percentage": 13.4, "elapsed_time": "0:06:58", "remaining_time": "0:45:03", "throughput": 5962.43, "total_tokens": 2493248} +{"current_steps": 5080, "total_steps": 37885, "loss": 0.1969, "lr": 1.9929444488305047e-06, "epoch": 0.6704500461924244, "percentage": 13.41, "elapsed_time": "0:06:58", "remaining_time": "0:45:02", "throughput": 5963.71, "total_tokens": 2495744} +{"current_steps": 5085, "total_steps": 37885, "loss": 0.0028, "lr": 1.992889713671913e-06, "epoch": 0.6711099379701729, "percentage": 13.42, "elapsed_time": "0:06:58", "remaining_time": "0:45:01", "throughput": 5964.85, "total_tokens": 2498176} +{"current_steps": 5090, "total_steps": 37885, "loss": 0.0702, "lr": 1.99283476777967e-06, "epoch": 0.6717698297479213, "percentage": 13.44, "elapsed_time": "0:06:59", "remaining_time": "0:45:00", "throughput": 5965.55, "total_tokens": 2500416} +{"current_steps": 5095, "total_steps": 37885, "loss": 0.1533, "lr": 1.9927796111654366e-06, "epoch": 0.6724297215256698, "percentage": 13.45, "elapsed_time": "0:06:59", "remaining_time": "0:44:59", "throughput": 5966.66, "total_tokens": 2502848} +{"current_steps": 5100, "total_steps": 37885, "loss": 0.1575, "lr": 1.99272424384092e-06, "epoch": 0.6730896133034182, "percentage": 13.46, "elapsed_time": "0:06:59", "remaining_time": "0:44:58", "throughput": 5967.52, "total_tokens": 2505152} +{"current_steps": 5105, "total_steps": 37885, "loss": 0.1766, "lr": 1.992668665817871e-06, "epoch": 0.6737495050811667, "percentage": 13.47, "elapsed_time": "0:07:00", "remaining_time": "0:44:57", "throughput": 5968.79, "total_tokens": 2507648} +{"current_steps": 5110, "total_steps": 37885, "loss": 0.0742, "lr": 1.9926128771080867e-06, "epoch": 0.6744093968589151, "percentage": 13.49, "elapsed_time": "0:07:00", "remaining_time": "0:44:56", "throughput": 5970.05, "total_tokens": 2510144} +{"current_steps": 5115, "total_steps": 37885, "loss": 0.2246, "lr": 1.9925568777234067e-06, "epoch": 0.6750692886366636, "percentage": 13.5, "elapsed_time": "0:07:00", "remaining_time": "0:44:55", "throughput": 5972.06, "total_tokens": 2513024} +{"current_steps": 5120, "total_steps": 37885, "loss": 0.0633, "lr": 1.992500667675717e-06, "epoch": 0.675729180414412, "percentage": 13.51, "elapsed_time": "0:07:01", "remaining_time": "0:44:54", "throughput": 5972.3, "total_tokens": 2515072} +{"current_steps": 5125, "total_steps": 37885, "loss": 0.0047, "lr": 1.992444246976948e-06, "epoch": 0.6763890721921605, "percentage": 13.53, "elapsed_time": "0:07:01", "remaining_time": "0:44:54", "throughput": 5973.11, "total_tokens": 2517376} +{"current_steps": 5130, "total_steps": 37885, "loss": 0.0688, "lr": 1.9923876156390743e-06, "epoch": 0.677048963969909, "percentage": 13.54, "elapsed_time": "0:07:01", "remaining_time": "0:44:53", "throughput": 5974.72, "total_tokens": 2520064} +{"current_steps": 5135, "total_steps": 37885, "loss": 0.0592, "lr": 1.992330773674115e-06, "epoch": 0.6777088557476574, "percentage": 13.55, "elapsed_time": "0:07:02", "remaining_time": "0:44:52", "throughput": 5976.28, "total_tokens": 2522688} +{"current_steps": 5140, "total_steps": 37885, "loss": 0.0356, "lr": 1.9922737210941353e-06, "epoch": 0.6783687475254059, "percentage": 13.57, "elapsed_time": "0:07:02", "remaining_time": "0:44:51", "throughput": 5977.51, "total_tokens": 2525184} +{"current_steps": 5145, "total_steps": 37885, "loss": 0.0004, "lr": 1.9922164579112436e-06, "epoch": 0.6790286393031543, "percentage": 13.58, "elapsed_time": "0:07:02", "remaining_time": "0:44:50", "throughput": 5978.5, "total_tokens": 2527552} +{"current_steps": 5150, "total_steps": 37885, "loss": 0.0067, "lr": 1.9921589841375938e-06, "epoch": 0.6796885310809028, "percentage": 13.59, "elapsed_time": "0:07:03", "remaining_time": "0:44:49", "throughput": 5980.17, "total_tokens": 2530240} +{"current_steps": 5155, "total_steps": 37885, "loss": 0.0581, "lr": 1.9921012997853843e-06, "epoch": 0.6803484228586512, "percentage": 13.61, "elapsed_time": "0:07:03", "remaining_time": "0:44:48", "throughput": 5980.87, "total_tokens": 2532480} +{"current_steps": 5160, "total_steps": 37885, "loss": 0.0488, "lr": 1.9920434048668582e-06, "epoch": 0.6810083146363997, "percentage": 13.62, "elapsed_time": "0:07:03", "remaining_time": "0:44:47", "throughput": 5981.94, "total_tokens": 2534912} +{"current_steps": 5165, "total_steps": 37885, "loss": 0.1032, "lr": 1.9919852993943035e-06, "epoch": 0.681668206414148, "percentage": 13.63, "elapsed_time": "0:07:04", "remaining_time": "0:44:46", "throughput": 5983.18, "total_tokens": 2537408} +{"current_steps": 5170, "total_steps": 37885, "loss": 0.3101, "lr": 1.991926983380052e-06, "epoch": 0.6823280981918965, "percentage": 13.65, "elapsed_time": "0:07:04", "remaining_time": "0:44:45", "throughput": 5984.15, "total_tokens": 2539776} +{"current_steps": 5175, "total_steps": 37885, "loss": 0.0739, "lr": 1.9918684568364813e-06, "epoch": 0.6829879899696449, "percentage": 13.66, "elapsed_time": "0:07:04", "remaining_time": "0:44:44", "throughput": 5985.24, "total_tokens": 2542208} +{"current_steps": 5180, "total_steps": 37885, "loss": 0.0663, "lr": 1.9918097197760134e-06, "epoch": 0.6836478817473934, "percentage": 13.67, "elapsed_time": "0:07:05", "remaining_time": "0:44:43", "throughput": 5986.44, "total_tokens": 2544704} +{"current_steps": 5185, "total_steps": 37885, "loss": 0.1005, "lr": 1.9917507722111144e-06, "epoch": 0.6843077735251418, "percentage": 13.69, "elapsed_time": "0:07:05", "remaining_time": "0:44:42", "throughput": 5987.39, "total_tokens": 2547072} +{"current_steps": 5190, "total_steps": 37885, "loss": 0.1848, "lr": 1.9916916141542957e-06, "epoch": 0.6849676653028903, "percentage": 13.7, "elapsed_time": "0:07:05", "remaining_time": "0:44:41", "throughput": 5988.33, "total_tokens": 2549440} +{"current_steps": 5195, "total_steps": 37885, "loss": 0.4134, "lr": 1.991632245618113e-06, "epoch": 0.6856275570806388, "percentage": 13.71, "elapsed_time": "0:07:06", "remaining_time": "0:44:41", "throughput": 5989.98, "total_tokens": 2552128} +{"current_steps": 5200, "total_steps": 37885, "loss": 0.0699, "lr": 1.9915726666151673e-06, "epoch": 0.6862874488583872, "percentage": 13.73, "elapsed_time": "0:07:06", "remaining_time": "0:44:40", "throughput": 5990.64, "total_tokens": 2554368} +{"current_steps": 5205, "total_steps": 37885, "loss": 0.1567, "lr": 1.9915128771581033e-06, "epoch": 0.6869473406361357, "percentage": 13.74, "elapsed_time": "0:07:06", "remaining_time": "0:44:39", "throughput": 5992.03, "total_tokens": 2556928} +{"current_steps": 5210, "total_steps": 37885, "loss": 0.17, "lr": 1.9914528772596113e-06, "epoch": 0.6876072324138841, "percentage": 13.75, "elapsed_time": "0:07:07", "remaining_time": "0:44:38", "throughput": 5993.13, "total_tokens": 2559360} +{"current_steps": 5215, "total_steps": 37885, "loss": 0.2589, "lr": 1.9913926669324253e-06, "epoch": 0.6882671241916326, "percentage": 13.77, "elapsed_time": "0:07:07", "remaining_time": "0:44:37", "throughput": 5994.36, "total_tokens": 2561856} +{"current_steps": 5220, "total_steps": 37885, "loss": 0.2197, "lr": 1.991332246189325e-06, "epoch": 0.688927015969381, "percentage": 13.78, "elapsed_time": "0:07:07", "remaining_time": "0:44:36", "throughput": 5995.59, "total_tokens": 2564352} +{"current_steps": 5225, "total_steps": 37885, "loss": 0.0444, "lr": 1.9912716150431343e-06, "epoch": 0.6895869077471295, "percentage": 13.79, "elapsed_time": "0:07:08", "remaining_time": "0:44:35", "throughput": 5996.63, "total_tokens": 2566784} +{"current_steps": 5230, "total_steps": 37885, "loss": 0.1357, "lr": 1.9912107735067215e-06, "epoch": 0.6902467995248779, "percentage": 13.8, "elapsed_time": "0:07:08", "remaining_time": "0:44:34", "throughput": 5997.6, "total_tokens": 2569152} +{"current_steps": 5235, "total_steps": 37885, "loss": 0.0881, "lr": 1.991149721593e-06, "epoch": 0.6909066913026264, "percentage": 13.82, "elapsed_time": "0:07:08", "remaining_time": "0:44:33", "throughput": 5998.54, "total_tokens": 2571520} +{"current_steps": 5240, "total_steps": 37885, "loss": 0.0346, "lr": 1.991088459314927e-06, "epoch": 0.6915665830803748, "percentage": 13.83, "elapsed_time": "0:07:09", "remaining_time": "0:44:32", "throughput": 5999.91, "total_tokens": 2574080} +{"current_steps": 5245, "total_steps": 37885, "loss": 0.0699, "lr": 1.991026986685506e-06, "epoch": 0.6922264748581233, "percentage": 13.84, "elapsed_time": "0:07:09", "remaining_time": "0:44:31", "throughput": 6000.59, "total_tokens": 2576320} +{"current_steps": 5250, "total_steps": 37885, "loss": 0.073, "lr": 1.9909653037177826e-06, "epoch": 0.6928863666358717, "percentage": 13.86, "elapsed_time": "0:07:09", "remaining_time": "0:44:30", "throughput": 6001.68, "total_tokens": 2578752} +{"current_steps": 5255, "total_steps": 37885, "loss": 0.0697, "lr": 1.9909034104248503e-06, "epoch": 0.6935462584136202, "percentage": 13.87, "elapsed_time": "0:07:09", "remaining_time": "0:44:30", "throughput": 6002.76, "total_tokens": 2581184} +{"current_steps": 5260, "total_steps": 37885, "loss": 0.1952, "lr": 1.9908413068198442e-06, "epoch": 0.6942061501913687, "percentage": 13.88, "elapsed_time": "0:07:10", "remaining_time": "0:44:29", "throughput": 6004.31, "total_tokens": 2583872} +{"current_steps": 5265, "total_steps": 37885, "loss": 0.2296, "lr": 1.990778992915946e-06, "epoch": 0.694866041969117, "percentage": 13.9, "elapsed_time": "0:07:10", "remaining_time": "0:44:28", "throughput": 6005.36, "total_tokens": 2586304} +{"current_steps": 5270, "total_steps": 37885, "loss": 0.1202, "lr": 1.990716468726381e-06, "epoch": 0.6955259337468656, "percentage": 13.91, "elapsed_time": "0:07:10", "remaining_time": "0:44:27", "throughput": 6007.1, "total_tokens": 2589056} +{"current_steps": 5275, "total_steps": 37885, "loss": 0.1517, "lr": 1.9906537342644203e-06, "epoch": 0.6961858255246139, "percentage": 13.92, "elapsed_time": "0:07:11", "remaining_time": "0:44:26", "throughput": 6008.18, "total_tokens": 2591488} +{"current_steps": 5280, "total_steps": 37885, "loss": 0.0183, "lr": 1.990590789543378e-06, "epoch": 0.6968457173023624, "percentage": 13.94, "elapsed_time": "0:07:11", "remaining_time": "0:44:25", "throughput": 6008.97, "total_tokens": 2593792} +{"current_steps": 5285, "total_steps": 37885, "loss": 0.2654, "lr": 1.9905276345766134e-06, "epoch": 0.6975056090801108, "percentage": 13.95, "elapsed_time": "0:07:11", "remaining_time": "0:44:24", "throughput": 6009.5, "total_tokens": 2595968} +{"current_steps": 5290, "total_steps": 37885, "loss": 0.2556, "lr": 1.990464269377532e-06, "epoch": 0.6981655008578593, "percentage": 13.96, "elapsed_time": "0:07:12", "remaining_time": "0:44:23", "throughput": 6010.03, "total_tokens": 2598144} +{"current_steps": 5295, "total_steps": 37885, "loss": 0.0923, "lr": 1.9904006939595815e-06, "epoch": 0.6988253926356077, "percentage": 13.98, "elapsed_time": "0:07:12", "remaining_time": "0:44:22", "throughput": 6010.82, "total_tokens": 2600448} +{"current_steps": 5300, "total_steps": 37885, "loss": 0.0051, "lr": 1.9903369083362554e-06, "epoch": 0.6994852844133562, "percentage": 13.99, "elapsed_time": "0:07:12", "remaining_time": "0:44:21", "throughput": 6012.0, "total_tokens": 2602944} +{"current_steps": 5305, "total_steps": 37885, "loss": 0.1914, "lr": 1.990272912521092e-06, "epoch": 0.7001451761911046, "percentage": 14.0, "elapsed_time": "0:07:13", "remaining_time": "0:44:20", "throughput": 6012.51, "total_tokens": 2605120} +{"current_steps": 5310, "total_steps": 37885, "loss": 0.0594, "lr": 1.990208706527674e-06, "epoch": 0.7008050679688531, "percentage": 14.02, "elapsed_time": "0:07:13", "remaining_time": "0:44:20", "throughput": 6013.04, "total_tokens": 2607296} +{"current_steps": 5315, "total_steps": 37885, "loss": 0.0578, "lr": 1.9901442903696284e-06, "epoch": 0.7014649597466015, "percentage": 14.03, "elapsed_time": "0:07:13", "remaining_time": "0:44:19", "throughput": 6014.11, "total_tokens": 2609728} +{"current_steps": 5320, "total_steps": 37885, "loss": 0.2463, "lr": 1.990079664060628e-06, "epoch": 0.70212485152435, "percentage": 14.04, "elapsed_time": "0:07:14", "remaining_time": "0:44:18", "throughput": 6015.31, "total_tokens": 2612224} +{"current_steps": 5325, "total_steps": 37885, "loss": 0.0071, "lr": 1.9900148276143874e-06, "epoch": 0.7027847433020985, "percentage": 14.06, "elapsed_time": "0:07:14", "remaining_time": "0:44:17", "throughput": 6016.47, "total_tokens": 2614720} +{"current_steps": 5330, "total_steps": 37885, "loss": 0.3149, "lr": 1.9899497810446694e-06, "epoch": 0.7034446350798469, "percentage": 14.07, "elapsed_time": "0:07:14", "remaining_time": "0:44:16", "throughput": 6017.9, "total_tokens": 2617344} +{"current_steps": 5335, "total_steps": 37885, "loss": 0.1851, "lr": 1.989884524365279e-06, "epoch": 0.7041045268575954, "percentage": 14.08, "elapsed_time": "0:07:15", "remaining_time": "0:44:15", "throughput": 6018.55, "total_tokens": 2619584} +{"current_steps": 5340, "total_steps": 37885, "loss": 0.0551, "lr": 1.9898190575900664e-06, "epoch": 0.7047644186353438, "percentage": 14.1, "elapsed_time": "0:07:15", "remaining_time": "0:44:14", "throughput": 6019.31, "total_tokens": 2621888} +{"current_steps": 5345, "total_steps": 37885, "loss": 0.1479, "lr": 1.9897533807329265e-06, "epoch": 0.7054243104130923, "percentage": 14.11, "elapsed_time": "0:07:15", "remaining_time": "0:44:13", "throughput": 6020.79, "total_tokens": 2624512} +{"current_steps": 5350, "total_steps": 37885, "loss": 0.0035, "lr": 1.989687493807799e-06, "epoch": 0.7060842021908407, "percentage": 14.12, "elapsed_time": "0:07:16", "remaining_time": "0:44:12", "throughput": 6021.93, "total_tokens": 2627008} +{"current_steps": 5355, "total_steps": 37885, "loss": 0.0406, "lr": 1.9896213968286672e-06, "epoch": 0.7067440939685892, "percentage": 14.13, "elapsed_time": "0:07:16", "remaining_time": "0:44:12", "throughput": 6022.99, "total_tokens": 2629440} +{"current_steps": 5360, "total_steps": 37885, "loss": 0.1103, "lr": 1.9895550898095606e-06, "epoch": 0.7074039857463376, "percentage": 14.15, "elapsed_time": "0:07:16", "remaining_time": "0:44:11", "throughput": 6024.03, "total_tokens": 2631872} +{"current_steps": 5365, "total_steps": 37885, "loss": 0.1771, "lr": 1.9894885727645516e-06, "epoch": 0.7080638775240861, "percentage": 14.16, "elapsed_time": "0:07:17", "remaining_time": "0:44:10", "throughput": 6025.6, "total_tokens": 2634560} +{"current_steps": 5370, "total_steps": 37885, "loss": 0.2135, "lr": 1.989421845707759e-06, "epoch": 0.7087237693018344, "percentage": 14.17, "elapsed_time": "0:07:17", "remaining_time": "0:44:09", "throughput": 6026.91, "total_tokens": 2637120} +{"current_steps": 5375, "total_steps": 37885, "loss": 0.0021, "lr": 1.989354908653344e-06, "epoch": 0.709383661079583, "percentage": 14.19, "elapsed_time": "0:07:17", "remaining_time": "0:44:08", "throughput": 6027.92, "total_tokens": 2639552} +{"current_steps": 5380, "total_steps": 37885, "loss": 0.1498, "lr": 1.989287761615514e-06, "epoch": 0.7100435528573315, "percentage": 14.2, "elapsed_time": "0:07:18", "remaining_time": "0:44:07", "throughput": 6028.97, "total_tokens": 2641984} +{"current_steps": 5385, "total_steps": 37885, "loss": 0.0816, "lr": 1.9892204046085206e-06, "epoch": 0.7107034446350798, "percentage": 14.21, "elapsed_time": "0:07:18", "remaining_time": "0:44:06", "throughput": 6029.88, "total_tokens": 2644352} +{"current_steps": 5390, "total_steps": 37885, "loss": 0.0261, "lr": 1.98915283764666e-06, "epoch": 0.7113633364128283, "percentage": 14.23, "elapsed_time": "0:07:18", "remaining_time": "0:44:05", "throughput": 6031.46, "total_tokens": 2647040} +{"current_steps": 5395, "total_steps": 37885, "loss": 0.0705, "lr": 1.989085060744272e-06, "epoch": 0.7120232281905767, "percentage": 14.24, "elapsed_time": "0:07:19", "remaining_time": "0:44:04", "throughput": 6032.44, "total_tokens": 2649472} +{"current_steps": 5400, "total_steps": 37885, "loss": 0.0431, "lr": 1.989017073915742e-06, "epoch": 0.7126831199683252, "percentage": 14.25, "elapsed_time": "0:07:19", "remaining_time": "0:44:04", "throughput": 6033.32, "total_tokens": 2651840} +{"current_steps": 5405, "total_steps": 37885, "loss": 0.0093, "lr": 1.9889488771755004e-06, "epoch": 0.7133430117460736, "percentage": 14.27, "elapsed_time": "0:07:19", "remaining_time": "0:44:03", "throughput": 6034.76, "total_tokens": 2654464} +{"current_steps": 5410, "total_steps": 37885, "loss": 0.1071, "lr": 1.9888804705380207e-06, "epoch": 0.7140029035238221, "percentage": 14.28, "elapsed_time": "0:07:20", "remaining_time": "0:44:02", "throughput": 6035.11, "total_tokens": 2656576} +{"current_steps": 5415, "total_steps": 37885, "loss": 0.0828, "lr": 1.9888118540178228e-06, "epoch": 0.7146627953015705, "percentage": 14.29, "elapsed_time": "0:07:20", "remaining_time": "0:44:01", "throughput": 6036.17, "total_tokens": 2659008} +{"current_steps": 5420, "total_steps": 37885, "loss": 0.0466, "lr": 1.9887430276294688e-06, "epoch": 0.715322687079319, "percentage": 14.31, "elapsed_time": "0:07:20", "remaining_time": "0:44:00", "throughput": 6037.62, "total_tokens": 2661632} +{"current_steps": 5425, "total_steps": 37885, "loss": 0.1611, "lr": 1.9886739913875666e-06, "epoch": 0.7159825788570674, "percentage": 14.32, "elapsed_time": "0:07:21", "remaining_time": "0:43:59", "throughput": 6038.91, "total_tokens": 2664192} +{"current_steps": 5430, "total_steps": 37885, "loss": 0.0963, "lr": 1.98860474530677e-06, "epoch": 0.7166424706348159, "percentage": 14.33, "elapsed_time": "0:07:21", "remaining_time": "0:43:58", "throughput": 6039.93, "total_tokens": 2666624} +{"current_steps": 5435, "total_steps": 37885, "loss": 0.1402, "lr": 1.9885352894017745e-06, "epoch": 0.7173023624125643, "percentage": 14.35, "elapsed_time": "0:07:21", "remaining_time": "0:43:57", "throughput": 6041.13, "total_tokens": 2669120} +{"current_steps": 5440, "total_steps": 37885, "loss": 0.2358, "lr": 1.9884656236873224e-06, "epoch": 0.7179622541903128, "percentage": 14.36, "elapsed_time": "0:07:22", "remaining_time": "0:43:57", "throughput": 6042.19, "total_tokens": 2671552} +{"current_steps": 5445, "total_steps": 37885, "loss": 0.1333, "lr": 1.9883957481781998e-06, "epoch": 0.7186221459680613, "percentage": 14.37, "elapsed_time": "0:07:22", "remaining_time": "0:43:56", "throughput": 6043.7, "total_tokens": 2674240} +{"current_steps": 5450, "total_steps": 37885, "loss": 0.1131, "lr": 1.988325662889237e-06, "epoch": 0.7192820377458097, "percentage": 14.39, "elapsed_time": "0:07:22", "remaining_time": "0:43:55", "throughput": 6044.44, "total_tokens": 2676544} +{"current_steps": 5455, "total_steps": 37885, "loss": 0.0009, "lr": 1.988255367835309e-06, "epoch": 0.7199419295235582, "percentage": 14.4, "elapsed_time": "0:07:23", "remaining_time": "0:43:54", "throughput": 6045.32, "total_tokens": 2678912} +{"current_steps": 5460, "total_steps": 37885, "loss": 0.0309, "lr": 1.9881848630313357e-06, "epoch": 0.7206018213013066, "percentage": 14.41, "elapsed_time": "0:07:23", "remaining_time": "0:43:53", "throughput": 6046.35, "total_tokens": 2681344} +{"current_steps": 5465, "total_steps": 37885, "loss": 0.0208, "lr": 1.988114148492281e-06, "epoch": 0.7212617130790551, "percentage": 14.43, "elapsed_time": "0:07:23", "remaining_time": "0:43:52", "throughput": 6047.33, "total_tokens": 2683776} +{"current_steps": 5470, "total_steps": 37885, "loss": 0.1115, "lr": 1.9880432242331534e-06, "epoch": 0.7219216048568035, "percentage": 14.44, "elapsed_time": "0:07:24", "remaining_time": "0:43:51", "throughput": 6047.94, "total_tokens": 2686016} +{"current_steps": 5475, "total_steps": 37885, "loss": 0.1267, "lr": 1.9879720902690067e-06, "epoch": 0.722581496634552, "percentage": 14.45, "elapsed_time": "0:07:24", "remaining_time": "0:43:50", "throughput": 6048.28, "total_tokens": 2688128} +{"current_steps": 5480, "total_steps": 37885, "loss": 0.2031, "lr": 1.987900746614938e-06, "epoch": 0.7232413884123003, "percentage": 14.46, "elapsed_time": "0:07:24", "remaining_time": "0:43:50", "throughput": 6048.91, "total_tokens": 2690368} +{"current_steps": 5485, "total_steps": 37885, "loss": 0.0919, "lr": 1.98782919328609e-06, "epoch": 0.7239012801900488, "percentage": 14.48, "elapsed_time": "0:07:25", "remaining_time": "0:43:49", "throughput": 6050.3, "total_tokens": 2692992} +{"current_steps": 5490, "total_steps": 37885, "loss": 0.0433, "lr": 1.9877574302976484e-06, "epoch": 0.7245611719677972, "percentage": 14.49, "elapsed_time": "0:07:25", "remaining_time": "0:43:48", "throughput": 6051.28, "total_tokens": 2695424} +{"current_steps": 5495, "total_steps": 37885, "loss": 0.1607, "lr": 1.987685457664845e-06, "epoch": 0.7252210637455457, "percentage": 14.5, "elapsed_time": "0:07:25", "remaining_time": "0:43:47", "throughput": 6052.29, "total_tokens": 2697856} +{"current_steps": 5500, "total_steps": 37885, "loss": 0.0006, "lr": 1.987613275402956e-06, "epoch": 0.7258809555232941, "percentage": 14.52, "elapsed_time": "0:07:26", "remaining_time": "0:43:46", "throughput": 6053.92, "total_tokens": 2700608} +{"current_steps": 5505, "total_steps": 37885, "loss": 0.023, "lr": 1.9875408835273007e-06, "epoch": 0.7265408473010426, "percentage": 14.53, "elapsed_time": "0:07:26", "remaining_time": "0:43:45", "throughput": 6055.0, "total_tokens": 2703104} +{"current_steps": 5510, "total_steps": 37885, "loss": 0.1917, "lr": 1.9874682820532444e-06, "epoch": 0.7272007390787911, "percentage": 14.54, "elapsed_time": "0:07:26", "remaining_time": "0:43:44", "throughput": 6055.59, "total_tokens": 2705344} +{"current_steps": 5515, "total_steps": 37885, "loss": 0.0201, "lr": 1.9873954709961956e-06, "epoch": 0.7278606308565395, "percentage": 14.56, "elapsed_time": "0:07:27", "remaining_time": "0:43:44", "throughput": 6056.08, "total_tokens": 2707520} +{"current_steps": 5520, "total_steps": 37885, "loss": 0.1724, "lr": 1.987322450371608e-06, "epoch": 0.728520522634288, "percentage": 14.57, "elapsed_time": "0:07:27", "remaining_time": "0:43:43", "throughput": 6056.92, "total_tokens": 2709888} +{"current_steps": 5525, "total_steps": 37885, "loss": 0.2705, "lr": 1.9872492201949807e-06, "epoch": 0.7291804144120364, "percentage": 14.58, "elapsed_time": "0:07:27", "remaining_time": "0:43:42", "throughput": 6057.64, "total_tokens": 2712192} +{"current_steps": 5530, "total_steps": 37885, "loss": 0.0019, "lr": 1.9871757804818546e-06, "epoch": 0.7298403061897849, "percentage": 14.6, "elapsed_time": "0:07:28", "remaining_time": "0:43:41", "throughput": 6058.09, "total_tokens": 2714368} +{"current_steps": 5535, "total_steps": 37885, "loss": 0.1082, "lr": 1.9871021312478183e-06, "epoch": 0.7305001979675333, "percentage": 14.61, "elapsed_time": "0:07:28", "remaining_time": "0:43:40", "throughput": 6058.68, "total_tokens": 2716608} +{"current_steps": 5540, "total_steps": 37885, "loss": 0.0082, "lr": 1.9870282725085025e-06, "epoch": 0.7311600897452818, "percentage": 14.62, "elapsed_time": "0:07:28", "remaining_time": "0:43:39", "throughput": 6058.86, "total_tokens": 2718656} +{"current_steps": 5545, "total_steps": 37885, "loss": 0.104, "lr": 1.9869542042795832e-06, "epoch": 0.7318199815230302, "percentage": 14.64, "elapsed_time": "0:07:29", "remaining_time": "0:43:38", "throughput": 6059.98, "total_tokens": 2721152} +{"current_steps": 5550, "total_steps": 37885, "loss": 0.0037, "lr": 1.9868799265767814e-06, "epoch": 0.7324798733007787, "percentage": 14.65, "elapsed_time": "0:07:29", "remaining_time": "0:43:38", "throughput": 6060.32, "total_tokens": 2723264} +{"current_steps": 5555, "total_steps": 37885, "loss": 0.268, "lr": 1.986805439415861e-06, "epoch": 0.7331397650785271, "percentage": 14.66, "elapsed_time": "0:07:29", "remaining_time": "0:43:37", "throughput": 6061.05, "total_tokens": 2725568} +{"current_steps": 5560, "total_steps": 37885, "loss": 0.1503, "lr": 1.9867307428126327e-06, "epoch": 0.7337996568562756, "percentage": 14.68, "elapsed_time": "0:07:30", "remaining_time": "0:43:36", "throughput": 6062.43, "total_tokens": 2728192} +{"current_steps": 5565, "total_steps": 37885, "loss": 0.2448, "lr": 1.9866558367829493e-06, "epoch": 0.7344595486340241, "percentage": 14.69, "elapsed_time": "0:07:30", "remaining_time": "0:43:35", "throughput": 6064.3, "total_tokens": 2731072} +{"current_steps": 5570, "total_steps": 37885, "loss": 0.123, "lr": 1.986580721342709e-06, "epoch": 0.7351194404117725, "percentage": 14.7, "elapsed_time": "0:07:30", "remaining_time": "0:43:34", "throughput": 6065.12, "total_tokens": 2733440} +{"current_steps": 5575, "total_steps": 37885, "loss": 0.1279, "lr": 1.986505396507855e-06, "epoch": 0.735779332189521, "percentage": 14.72, "elapsed_time": "0:07:31", "remaining_time": "0:43:33", "throughput": 6066.49, "total_tokens": 2736064} +{"current_steps": 5580, "total_steps": 37885, "loss": 0.0323, "lr": 1.9864298622943747e-06, "epoch": 0.7364392239672694, "percentage": 14.73, "elapsed_time": "0:07:31", "remaining_time": "0:43:33", "throughput": 6067.46, "total_tokens": 2738496} +{"current_steps": 5585, "total_steps": 37885, "loss": 0.0531, "lr": 1.986354118718299e-06, "epoch": 0.7370991157450179, "percentage": 14.74, "elapsed_time": "0:07:31", "remaining_time": "0:43:32", "throughput": 6068.2, "total_tokens": 2740800} +{"current_steps": 5590, "total_steps": 37885, "loss": 0.0734, "lr": 1.9862781657957043e-06, "epoch": 0.7377590075227662, "percentage": 14.76, "elapsed_time": "0:07:31", "remaining_time": "0:43:31", "throughput": 6068.89, "total_tokens": 2743104} +{"current_steps": 5595, "total_steps": 37885, "loss": 0.164, "lr": 1.986202003542711e-06, "epoch": 0.7384188993005147, "percentage": 14.77, "elapsed_time": "0:07:32", "remaining_time": "0:43:30", "throughput": 6069.5, "total_tokens": 2745344} +{"current_steps": 5600, "total_steps": 37885, "loss": 0.0798, "lr": 1.9861256319754836e-06, "epoch": 0.7390787910782631, "percentage": 14.78, "elapsed_time": "0:07:32", "remaining_time": "0:43:29", "throughput": 6069.93, "total_tokens": 2747520} +{"current_steps": 5605, "total_steps": 37885, "loss": 0.0556, "lr": 1.986049051110232e-06, "epoch": 0.7397386828560116, "percentage": 14.79, "elapsed_time": "0:07:32", "remaining_time": "0:43:28", "throughput": 6071.02, "total_tokens": 2750016} +{"current_steps": 5610, "total_steps": 37885, "loss": 0.165, "lr": 1.9859722609632097e-06, "epoch": 0.74039857463376, "percentage": 14.81, "elapsed_time": "0:07:33", "remaining_time": "0:43:27", "throughput": 6072.5, "total_tokens": 2752704} +{"current_steps": 5615, "total_steps": 37885, "loss": 0.1732, "lr": 1.985895261550715e-06, "epoch": 0.7410584664115085, "percentage": 14.82, "elapsed_time": "0:07:33", "remaining_time": "0:43:27", "throughput": 6073.8, "total_tokens": 2755328} +{"current_steps": 5620, "total_steps": 37885, "loss": 0.1728, "lr": 1.9858180528890898e-06, "epoch": 0.7417183581892569, "percentage": 14.83, "elapsed_time": "0:07:33", "remaining_time": "0:43:26", "throughput": 6074.45, "total_tokens": 2757632} +{"current_steps": 5625, "total_steps": 37885, "loss": 0.0655, "lr": 1.985740634994722e-06, "epoch": 0.7423782499670054, "percentage": 14.85, "elapsed_time": "0:07:34", "remaining_time": "0:43:25", "throughput": 6075.7, "total_tokens": 2760192} +{"current_steps": 5630, "total_steps": 37885, "loss": 0.0018, "lr": 1.985663007884043e-06, "epoch": 0.7430381417447539, "percentage": 14.86, "elapsed_time": "0:07:34", "remaining_time": "0:43:24", "throughput": 6077.03, "total_tokens": 2762816} +{"current_steps": 5635, "total_steps": 37885, "loss": 0.0711, "lr": 1.9855851715735275e-06, "epoch": 0.7436980335225023, "percentage": 14.87, "elapsed_time": "0:07:34", "remaining_time": "0:43:23", "throughput": 6077.71, "total_tokens": 2765120} +{"current_steps": 5640, "total_steps": 37885, "loss": 0.0933, "lr": 1.985507126079697e-06, "epoch": 0.7443579253002508, "percentage": 14.89, "elapsed_time": "0:07:35", "remaining_time": "0:43:23", "throughput": 6079.11, "total_tokens": 2767808} +{"current_steps": 5645, "total_steps": 37885, "loss": 0.0009, "lr": 1.985428871419115e-06, "epoch": 0.7450178170779992, "percentage": 14.9, "elapsed_time": "0:07:35", "remaining_time": "0:43:22", "throughput": 6079.95, "total_tokens": 2770176} +{"current_steps": 5650, "total_steps": 37885, "loss": 0.1552, "lr": 1.9853504076083914e-06, "epoch": 0.7456777088557477, "percentage": 14.91, "elapsed_time": "0:07:35", "remaining_time": "0:43:21", "throughput": 6081.04, "total_tokens": 2772672} +{"current_steps": 5655, "total_steps": 37885, "loss": 0.1258, "lr": 1.985271734664179e-06, "epoch": 0.7463376006334961, "percentage": 14.93, "elapsed_time": "0:07:36", "remaining_time": "0:43:20", "throughput": 6081.99, "total_tokens": 2775104} +{"current_steps": 5660, "total_steps": 37885, "loss": 0.3175, "lr": 1.985192852603175e-06, "epoch": 0.7469974924112446, "percentage": 14.94, "elapsed_time": "0:07:36", "remaining_time": "0:43:19", "throughput": 6083.45, "total_tokens": 2777792} +{"current_steps": 5665, "total_steps": 37885, "loss": 0.2089, "lr": 1.9851137614421234e-06, "epoch": 0.747657384188993, "percentage": 14.95, "elapsed_time": "0:07:36", "remaining_time": "0:43:18", "throughput": 6084.78, "total_tokens": 2780416} +{"current_steps": 5670, "total_steps": 37885, "loss": 0.0021, "lr": 1.9850344611978085e-06, "epoch": 0.7483172759667415, "percentage": 14.97, "elapsed_time": "0:07:37", "remaining_time": "0:43:18", "throughput": 6086.49, "total_tokens": 2783232} +{"current_steps": 5675, "total_steps": 37885, "loss": 0.1406, "lr": 1.984954951887063e-06, "epoch": 0.7489771677444899, "percentage": 14.98, "elapsed_time": "0:07:37", "remaining_time": "0:43:17", "throughput": 6087.46, "total_tokens": 2785664} +{"current_steps": 5680, "total_steps": 37885, "loss": 0.0632, "lr": 1.984875233526761e-06, "epoch": 0.7496370595222384, "percentage": 14.99, "elapsed_time": "0:07:37", "remaining_time": "0:43:16", "throughput": 6088.61, "total_tokens": 2788224} +{"current_steps": 5685, "total_steps": 37885, "loss": 0.0028, "lr": 1.984795306133823e-06, "epoch": 0.7502969512999867, "percentage": 15.01, "elapsed_time": "0:07:38", "remaining_time": "0:43:15", "throughput": 6089.57, "total_tokens": 2790656} +{"current_steps": 5685, "total_steps": 37885, "eval_loss": 0.09698151051998138, "epoch": 0.7502969512999867, "percentage": 15.01, "elapsed_time": "0:07:46", "remaining_time": "0:43:59", "throughput": 5987.4, "total_tokens": 2790656} +{"current_steps": 5690, "total_steps": 37885, "loss": 0.0287, "lr": 1.984715169725212e-06, "epoch": 0.7509568430777352, "percentage": 15.02, "elapsed_time": "0:08:25", "remaining_time": "0:47:40", "throughput": 5525.46, "total_tokens": 2792960} +{"current_steps": 5695, "total_steps": 37885, "loss": 0.0862, "lr": 1.9846348243179373e-06, "epoch": 0.7516167348554837, "percentage": 15.03, "elapsed_time": "0:08:25", "remaining_time": "0:47:38", "throughput": 5527.11, "total_tokens": 2795648} +{"current_steps": 5700, "total_steps": 37885, "loss": 0.0883, "lr": 1.9845542699290516e-06, "epoch": 0.7522766266332321, "percentage": 15.05, "elapsed_time": "0:08:26", "remaining_time": "0:47:37", "throughput": 5527.58, "total_tokens": 2797696} +{"current_steps": 5705, "total_steps": 37885, "loss": 0.1298, "lr": 1.9844735065756513e-06, "epoch": 0.7529365184109806, "percentage": 15.06, "elapsed_time": "0:08:26", "remaining_time": "0:47:36", "throughput": 5528.88, "total_tokens": 2800192} +{"current_steps": 5710, "total_steps": 37885, "loss": 0.0658, "lr": 1.984392534274878e-06, "epoch": 0.753596410188729, "percentage": 15.07, "elapsed_time": "0:08:26", "remaining_time": "0:47:35", "throughput": 5529.93, "total_tokens": 2802560} +{"current_steps": 5715, "total_steps": 37885, "loss": 0.2382, "lr": 1.9843113530439184e-06, "epoch": 0.7542563019664775, "percentage": 15.09, "elapsed_time": "0:08:27", "remaining_time": "0:47:34", "throughput": 5531.1, "total_tokens": 2804992} +{"current_steps": 5720, "total_steps": 37885, "loss": 0.2144, "lr": 1.9842299629000014e-06, "epoch": 0.7549161937442259, "percentage": 15.1, "elapsed_time": "0:08:27", "remaining_time": "0:47:33", "throughput": 5532.08, "total_tokens": 2807296} +{"current_steps": 5725, "total_steps": 37885, "loss": 0.1445, "lr": 1.9841483638604025e-06, "epoch": 0.7555760855219744, "percentage": 15.11, "elapsed_time": "0:08:27", "remaining_time": "0:47:32", "throughput": 5533.78, "total_tokens": 2809984} +{"current_steps": 5730, "total_steps": 37885, "loss": 0.0021, "lr": 1.9840665559424395e-06, "epoch": 0.7562359772997228, "percentage": 15.12, "elapsed_time": "0:08:28", "remaining_time": "0:47:31", "throughput": 5535.52, "total_tokens": 2812736} +{"current_steps": 5735, "total_steps": 37885, "loss": 0.1602, "lr": 1.9839845391634764e-06, "epoch": 0.7568958690774713, "percentage": 15.14, "elapsed_time": "0:08:28", "remaining_time": "0:47:30", "throughput": 5536.5, "total_tokens": 2815040} +{"current_steps": 5740, "total_steps": 37885, "loss": 0.1313, "lr": 1.9839023135409203e-06, "epoch": 0.7575557608552197, "percentage": 15.15, "elapsed_time": "0:08:28", "remaining_time": "0:47:29", "throughput": 5537.45, "total_tokens": 2817344} +{"current_steps": 5745, "total_steps": 37885, "loss": 0.0919, "lr": 1.983819879092223e-06, "epoch": 0.7582156526329682, "percentage": 15.16, "elapsed_time": "0:08:29", "remaining_time": "0:47:28", "throughput": 5538.4, "total_tokens": 2819648} +{"current_steps": 5750, "total_steps": 37885, "loss": 0.2254, "lr": 1.9837372358348804e-06, "epoch": 0.7588755444107167, "percentage": 15.18, "elapsed_time": "0:08:29", "remaining_time": "0:47:27", "throughput": 5540.22, "total_tokens": 2822464} +{"current_steps": 5755, "total_steps": 37885, "loss": 0.1121, "lr": 1.9836543837864332e-06, "epoch": 0.7595354361884651, "percentage": 15.19, "elapsed_time": "0:08:29", "remaining_time": "0:47:26", "throughput": 5541.38, "total_tokens": 2824896} +{"current_steps": 5760, "total_steps": 37885, "loss": 0.1378, "lr": 1.9835713229644663e-06, "epoch": 0.7601953279662136, "percentage": 15.2, "elapsed_time": "0:08:30", "remaining_time": "0:47:25", "throughput": 5543.17, "total_tokens": 2827648} +{"current_steps": 5765, "total_steps": 37885, "loss": 0.1264, "lr": 1.983488053386608e-06, "epoch": 0.760855219743962, "percentage": 15.22, "elapsed_time": "0:08:30", "remaining_time": "0:47:23", "throughput": 5544.83, "total_tokens": 2830336} +{"current_steps": 5770, "total_steps": 37885, "loss": 0.039, "lr": 1.983404575070533e-06, "epoch": 0.7615151115217105, "percentage": 15.23, "elapsed_time": "0:08:30", "remaining_time": "0:47:22", "throughput": 5545.79, "total_tokens": 2832640} +{"current_steps": 5775, "total_steps": 37885, "loss": 0.0268, "lr": 1.9833208880339576e-06, "epoch": 0.7621750032994589, "percentage": 15.24, "elapsed_time": "0:08:31", "remaining_time": "0:47:21", "throughput": 5546.63, "total_tokens": 2834880} +{"current_steps": 5780, "total_steps": 37885, "loss": 0.1555, "lr": 1.983236992294645e-06, "epoch": 0.7628348950772074, "percentage": 15.26, "elapsed_time": "0:08:31", "remaining_time": "0:47:20", "throughput": 5548.02, "total_tokens": 2837440} +{"current_steps": 5785, "total_steps": 37885, "loss": 0.1095, "lr": 1.9831528878704003e-06, "epoch": 0.7634947868549558, "percentage": 15.27, "elapsed_time": "0:08:31", "remaining_time": "0:47:19", "throughput": 5549.08, "total_tokens": 2839808} +{"current_steps": 5790, "total_steps": 37885, "loss": 0.1398, "lr": 1.983068574779075e-06, "epoch": 0.7641546786327043, "percentage": 15.28, "elapsed_time": "0:08:32", "remaining_time": "0:47:18", "throughput": 5550.61, "total_tokens": 2842432} +{"current_steps": 5795, "total_steps": 37885, "loss": 0.1598, "lr": 1.9829840530385633e-06, "epoch": 0.7648145704104526, "percentage": 15.3, "elapsed_time": "0:08:32", "remaining_time": "0:47:17", "throughput": 5552.26, "total_tokens": 2845120} +{"current_steps": 5800, "total_steps": 37885, "loss": 0.0721, "lr": 1.9828993226668046e-06, "epoch": 0.7654744621882011, "percentage": 15.31, "elapsed_time": "0:08:32", "remaining_time": "0:47:16", "throughput": 5554.23, "total_tokens": 2848000} +{"current_steps": 5805, "total_steps": 37885, "loss": 0.1805, "lr": 1.982814383681782e-06, "epoch": 0.7661343539659495, "percentage": 15.32, "elapsed_time": "0:08:33", "remaining_time": "0:47:15", "throughput": 5555.72, "total_tokens": 2850624} +{"current_steps": 5810, "total_steps": 37885, "loss": 0.1815, "lr": 1.9827292361015235e-06, "epoch": 0.766794245743698, "percentage": 15.34, "elapsed_time": "0:08:33", "remaining_time": "0:47:14", "throughput": 5556.77, "total_tokens": 2852992} +{"current_steps": 5815, "total_steps": 37885, "loss": 0.0437, "lr": 1.9826438799441016e-06, "epoch": 0.7674541375214465, "percentage": 15.35, "elapsed_time": "0:08:33", "remaining_time": "0:47:13", "throughput": 5557.93, "total_tokens": 2855424} +{"current_steps": 5820, "total_steps": 37885, "loss": 0.147, "lr": 1.982558315227631e-06, "epoch": 0.7681140292991949, "percentage": 15.36, "elapsed_time": "0:08:34", "remaining_time": "0:47:12", "throughput": 5559.31, "total_tokens": 2857984} +{"current_steps": 5825, "total_steps": 37885, "loss": 0.0712, "lr": 1.982472541970274e-06, "epoch": 0.7687739210769434, "percentage": 15.38, "elapsed_time": "0:08:34", "remaining_time": "0:47:11", "throughput": 5560.94, "total_tokens": 2860672} +{"current_steps": 5830, "total_steps": 37885, "loss": 0.21, "lr": 1.9823865601902337e-06, "epoch": 0.7694338128546918, "percentage": 15.39, "elapsed_time": "0:08:34", "remaining_time": "0:47:10", "throughput": 5561.95, "total_tokens": 2863040} +{"current_steps": 5835, "total_steps": 37885, "loss": 0.1239, "lr": 1.9823003699057607e-06, "epoch": 0.7700937046324403, "percentage": 15.4, "elapsed_time": "0:08:35", "remaining_time": "0:47:09", "throughput": 5563.79, "total_tokens": 2865856} +{"current_steps": 5840, "total_steps": 37885, "loss": 0.1, "lr": 1.9822139711351465e-06, "epoch": 0.7707535964101887, "percentage": 15.42, "elapsed_time": "0:08:35", "remaining_time": "0:47:08", "throughput": 5564.58, "total_tokens": 2868096} +{"current_steps": 5845, "total_steps": 37885, "loss": 0.0024, "lr": 1.9821273638967304e-06, "epoch": 0.7714134881879372, "percentage": 15.43, "elapsed_time": "0:08:35", "remaining_time": "0:47:07", "throughput": 5566.21, "total_tokens": 2870784} +{"current_steps": 5850, "total_steps": 37885, "loss": 0.0828, "lr": 1.9820405482088927e-06, "epoch": 0.7720733799656856, "percentage": 15.44, "elapsed_time": "0:08:36", "remaining_time": "0:47:06", "throughput": 5567.37, "total_tokens": 2873216} +{"current_steps": 5855, "total_steps": 37885, "loss": 0.001, "lr": 1.9819535240900606e-06, "epoch": 0.7727332717434341, "percentage": 15.45, "elapsed_time": "0:08:36", "remaining_time": "0:47:05", "throughput": 5568.75, "total_tokens": 2875776} +{"current_steps": 5860, "total_steps": 37885, "loss": 0.0624, "lr": 1.9818662915587036e-06, "epoch": 0.7733931635211825, "percentage": 15.47, "elapsed_time": "0:08:36", "remaining_time": "0:47:04", "throughput": 5570.13, "total_tokens": 2878336} +{"current_steps": 5865, "total_steps": 37885, "loss": 0.2229, "lr": 1.981778850633336e-06, "epoch": 0.774053055298931, "percentage": 15.48, "elapsed_time": "0:08:37", "remaining_time": "0:47:02", "throughput": 5571.5, "total_tokens": 2880896} +{"current_steps": 5870, "total_steps": 37885, "loss": 0.0652, "lr": 1.981691201332517e-06, "epoch": 0.7747129470766794, "percentage": 15.49, "elapsed_time": "0:08:37", "remaining_time": "0:47:01", "throughput": 5573.2, "total_tokens": 2883648} +{"current_steps": 5875, "total_steps": 37885, "loss": 0.0585, "lr": 1.9816033436748495e-06, "epoch": 0.7753728388544279, "percentage": 15.51, "elapsed_time": "0:08:37", "remaining_time": "0:47:00", "throughput": 5574.1, "total_tokens": 2885952} +{"current_steps": 5880, "total_steps": 37885, "loss": 0.079, "lr": 1.98151527767898e-06, "epoch": 0.7760327306321764, "percentage": 15.52, "elapsed_time": "0:08:38", "remaining_time": "0:46:59", "throughput": 5575.6, "total_tokens": 2888576} +{"current_steps": 5885, "total_steps": 37885, "loss": 0.1387, "lr": 1.981427003363601e-06, "epoch": 0.7766926224099248, "percentage": 15.53, "elapsed_time": "0:08:38", "remaining_time": "0:46:58", "throughput": 5576.99, "total_tokens": 2891136} +{"current_steps": 5890, "total_steps": 37885, "loss": 0.1429, "lr": 1.9813385207474472e-06, "epoch": 0.7773525141876733, "percentage": 15.55, "elapsed_time": "0:08:38", "remaining_time": "0:46:57", "throughput": 5578.38, "total_tokens": 2893696} +{"current_steps": 5895, "total_steps": 37885, "loss": 0.0546, "lr": 1.981249829849299e-06, "epoch": 0.7780124059654216, "percentage": 15.56, "elapsed_time": "0:08:39", "remaining_time": "0:46:56", "throughput": 5580.16, "total_tokens": 2896512} +{"current_steps": 5900, "total_steps": 37885, "loss": 0.1847, "lr": 1.9811609306879798e-06, "epoch": 0.7786722977431701, "percentage": 15.57, "elapsed_time": "0:08:39", "remaining_time": "0:46:55", "throughput": 5581.43, "total_tokens": 2899008} +{"current_steps": 5905, "total_steps": 37885, "loss": 0.0416, "lr": 1.9810718232823584e-06, "epoch": 0.7793321895209185, "percentage": 15.59, "elapsed_time": "0:08:39", "remaining_time": "0:46:54", "throughput": 5582.49, "total_tokens": 2901376} +{"current_steps": 5910, "total_steps": 37885, "loss": 0.2391, "lr": 1.9809825076513462e-06, "epoch": 0.779992081298667, "percentage": 15.6, "elapsed_time": "0:08:40", "remaining_time": "0:46:53", "throughput": 5583.78, "total_tokens": 2903872} +{"current_steps": 5915, "total_steps": 37885, "loss": 0.0021, "lr": 1.980892983813901e-06, "epoch": 0.7806519730764154, "percentage": 15.61, "elapsed_time": "0:08:40", "remaining_time": "0:46:52", "throughput": 5584.81, "total_tokens": 2906240} +{"current_steps": 5920, "total_steps": 37885, "loss": 0.1206, "lr": 1.980803251789023e-06, "epoch": 0.7813118648541639, "percentage": 15.63, "elapsed_time": "0:08:40", "remaining_time": "0:46:51", "throughput": 5586.08, "total_tokens": 2908736} +{"current_steps": 5925, "total_steps": 37885, "loss": 0.1592, "lr": 1.980713311595757e-06, "epoch": 0.7819717566319123, "percentage": 15.64, "elapsed_time": "0:08:41", "remaining_time": "0:46:50", "throughput": 5587.1, "total_tokens": 2911104} +{"current_steps": 5930, "total_steps": 37885, "loss": 0.0998, "lr": 1.980623163253192e-06, "epoch": 0.7826316484096608, "percentage": 15.65, "elapsed_time": "0:08:41", "remaining_time": "0:46:49", "throughput": 5588.15, "total_tokens": 2913472} +{"current_steps": 5935, "total_steps": 37885, "loss": 0.1875, "lr": 1.9805328067804626e-06, "epoch": 0.7832915401874093, "percentage": 15.67, "elapsed_time": "0:08:41", "remaining_time": "0:46:48", "throughput": 5589.16, "total_tokens": 2915840} +{"current_steps": 5940, "total_steps": 37885, "loss": 0.0014, "lr": 1.980442242196745e-06, "epoch": 0.7839514319651577, "percentage": 15.68, "elapsed_time": "0:08:42", "remaining_time": "0:46:47", "throughput": 5590.08, "total_tokens": 2918144} +{"current_steps": 5945, "total_steps": 37885, "loss": 0.1515, "lr": 1.9803514695212613e-06, "epoch": 0.7846113237429062, "percentage": 15.69, "elapsed_time": "0:08:42", "remaining_time": "0:46:46", "throughput": 5591.59, "total_tokens": 2920768} +{"current_steps": 5950, "total_steps": 37885, "loss": 0.093, "lr": 1.9802604887732773e-06, "epoch": 0.7852712155206546, "percentage": 15.71, "elapsed_time": "0:08:42", "remaining_time": "0:46:45", "throughput": 5592.59, "total_tokens": 2923136} +{"current_steps": 5955, "total_steps": 37885, "loss": 0.1338, "lr": 1.980169299972103e-06, "epoch": 0.7859311072984031, "percentage": 15.72, "elapsed_time": "0:08:43", "remaining_time": "0:46:44", "throughput": 5593.73, "total_tokens": 2925568} +{"current_steps": 5960, "total_steps": 37885, "loss": 0.0132, "lr": 1.980077903137093e-06, "epoch": 0.7865909990761515, "percentage": 15.73, "elapsed_time": "0:08:43", "remaining_time": "0:46:43", "throughput": 5594.9, "total_tokens": 2928064} +{"current_steps": 5965, "total_steps": 37885, "loss": 0.1477, "lr": 1.979986298287645e-06, "epoch": 0.7872508908539, "percentage": 15.75, "elapsed_time": "0:08:43", "remaining_time": "0:46:42", "throughput": 5595.71, "total_tokens": 2930368} +{"current_steps": 5970, "total_steps": 37885, "loss": 0.0939, "lr": 1.979894485443201e-06, "epoch": 0.7879107826316484, "percentage": 15.76, "elapsed_time": "0:08:44", "remaining_time": "0:46:41", "throughput": 5597.05, "total_tokens": 2932928} +{"current_steps": 5975, "total_steps": 37885, "loss": 0.2729, "lr": 1.9798024646232495e-06, "epoch": 0.7885706744093969, "percentage": 15.77, "elapsed_time": "0:08:44", "remaining_time": "0:46:40", "throughput": 5598.19, "total_tokens": 2935360} +{"current_steps": 5980, "total_steps": 37885, "loss": 0.0693, "lr": 1.9797102358473195e-06, "epoch": 0.7892305661871453, "percentage": 15.78, "elapsed_time": "0:08:44", "remaining_time": "0:46:39", "throughput": 5599.46, "total_tokens": 2937920} +{"current_steps": 5985, "total_steps": 37885, "loss": 0.2579, "lr": 1.979617799134986e-06, "epoch": 0.7898904579648938, "percentage": 15.8, "elapsed_time": "0:08:45", "remaining_time": "0:46:38", "throughput": 5600.38, "total_tokens": 2940224} +{"current_steps": 5990, "total_steps": 37885, "loss": 0.0073, "lr": 1.979525154505869e-06, "epoch": 0.7905503497426422, "percentage": 15.81, "elapsed_time": "0:08:45", "remaining_time": "0:46:37", "throughput": 5601.82, "total_tokens": 2942848} +{"current_steps": 5995, "total_steps": 37885, "loss": 0.1256, "lr": 1.979432301979631e-06, "epoch": 0.7912102415203907, "percentage": 15.82, "elapsed_time": "0:08:45", "remaining_time": "0:46:36", "throughput": 5603.07, "total_tokens": 2945344} +{"current_steps": 6000, "total_steps": 37885, "loss": 0.0084, "lr": 1.9793392415759796e-06, "epoch": 0.7918701332981392, "percentage": 15.84, "elapsed_time": "0:08:46", "remaining_time": "0:46:35", "throughput": 5604.25, "total_tokens": 2947840} +{"current_steps": 6005, "total_steps": 37885, "loss": 0.1543, "lr": 1.979245973314666e-06, "epoch": 0.7925300250758875, "percentage": 15.85, "elapsed_time": "0:08:46", "remaining_time": "0:46:34", "throughput": 5605.11, "total_tokens": 2950144} +{"current_steps": 6010, "total_steps": 37885, "loss": 0.0351, "lr": 1.9791524972154856e-06, "epoch": 0.793189916853636, "percentage": 15.86, "elapsed_time": "0:08:46", "remaining_time": "0:46:33", "throughput": 5605.88, "total_tokens": 2952384} +{"current_steps": 6015, "total_steps": 37885, "loss": 0.2565, "lr": 1.979058813298278e-06, "epoch": 0.7938498086313844, "percentage": 15.88, "elapsed_time": "0:08:46", "remaining_time": "0:46:32", "throughput": 5607.48, "total_tokens": 2955136} +{"current_steps": 6020, "total_steps": 37885, "loss": 0.0011, "lr": 1.978964921582927e-06, "epoch": 0.7945097004091329, "percentage": 15.89, "elapsed_time": "0:08:47", "remaining_time": "0:46:31", "throughput": 5609.04, "total_tokens": 2957824} +{"current_steps": 6025, "total_steps": 37885, "loss": 0.063, "lr": 1.9788708220893608e-06, "epoch": 0.7951695921868813, "percentage": 15.9, "elapsed_time": "0:08:47", "remaining_time": "0:46:30", "throughput": 5610.12, "total_tokens": 2960256} +{"current_steps": 6030, "total_steps": 37885, "loss": 0.19, "lr": 1.9787765148375506e-06, "epoch": 0.7958294839646298, "percentage": 15.92, "elapsed_time": "0:08:48", "remaining_time": "0:46:30", "throughput": 5608.27, "total_tokens": 2962944} +{"current_steps": 6035, "total_steps": 37885, "loss": 0.2955, "lr": 1.978681999847513e-06, "epoch": 0.7964893757423782, "percentage": 15.93, "elapsed_time": "0:08:48", "remaining_time": "0:46:29", "throughput": 5609.61, "total_tokens": 2965504} +{"current_steps": 6040, "total_steps": 37885, "loss": 0.3805, "lr": 1.9785872771393084e-06, "epoch": 0.7971492675201267, "percentage": 15.94, "elapsed_time": "0:08:48", "remaining_time": "0:46:28", "throughput": 5610.34, "total_tokens": 2967744} +{"current_steps": 6045, "total_steps": 37885, "loss": 0.0549, "lr": 1.9784923467330403e-06, "epoch": 0.7978091592978751, "percentage": 15.96, "elapsed_time": "0:08:49", "remaining_time": "0:46:27", "throughput": 5611.46, "total_tokens": 2970240} +{"current_steps": 6050, "total_steps": 37885, "loss": 0.2836, "lr": 1.9783972086488573e-06, "epoch": 0.7984690510756236, "percentage": 15.97, "elapsed_time": "0:08:49", "remaining_time": "0:46:27", "throughput": 5612.94, "total_tokens": 2972928} +{"current_steps": 6055, "total_steps": 37885, "loss": 0.0879, "lr": 1.9783018629069516e-06, "epoch": 0.799128942853372, "percentage": 15.98, "elapsed_time": "0:08:49", "remaining_time": "0:46:26", "throughput": 5613.68, "total_tokens": 2975168} +{"current_steps": 6060, "total_steps": 37885, "loss": 0.1121, "lr": 1.97820630952756e-06, "epoch": 0.7997888346311205, "percentage": 16.0, "elapsed_time": "0:08:50", "remaining_time": "0:46:25", "throughput": 5614.4, "total_tokens": 2977408} +{"current_steps": 6065, "total_steps": 37885, "loss": 0.0025, "lr": 1.978110548530963e-06, "epoch": 0.800448726408869, "percentage": 16.01, "elapsed_time": "0:08:50", "remaining_time": "0:46:24", "throughput": 5615.68, "total_tokens": 2979968} +{"current_steps": 6070, "total_steps": 37885, "loss": 0.2776, "lr": 1.9780145799374846e-06, "epoch": 0.8011086181866174, "percentage": 16.02, "elapsed_time": "0:08:50", "remaining_time": "0:46:23", "throughput": 5616.91, "total_tokens": 2982528} +{"current_steps": 6075, "total_steps": 37885, "loss": 0.0743, "lr": 1.977918403767494e-06, "epoch": 0.8017685099643659, "percentage": 16.04, "elapsed_time": "0:08:51", "remaining_time": "0:46:22", "throughput": 5617.75, "total_tokens": 2984832} +{"current_steps": 6080, "total_steps": 37885, "loss": 0.0383, "lr": 1.9778220200414036e-06, "epoch": 0.8024284017421143, "percentage": 16.05, "elapsed_time": "0:08:51", "remaining_time": "0:46:21", "throughput": 5618.93, "total_tokens": 2987328} +{"current_steps": 6085, "total_steps": 37885, "loss": 0.0037, "lr": 1.9777254287796706e-06, "epoch": 0.8030882935198628, "percentage": 16.06, "elapsed_time": "0:08:51", "remaining_time": "0:46:20", "throughput": 5619.94, "total_tokens": 2989760} +{"current_steps": 6090, "total_steps": 37885, "loss": 0.0012, "lr": 1.9776286300027954e-06, "epoch": 0.8037481852976112, "percentage": 16.07, "elapsed_time": "0:08:52", "remaining_time": "0:46:19", "throughput": 5621.22, "total_tokens": 2992320} +{"current_steps": 6095, "total_steps": 37885, "loss": 0.1335, "lr": 1.9775316237313225e-06, "epoch": 0.8044080770753597, "percentage": 16.09, "elapsed_time": "0:08:52", "remaining_time": "0:46:18", "throughput": 5622.91, "total_tokens": 2995136} +{"current_steps": 6100, "total_steps": 37885, "loss": 0.0003, "lr": 1.977434409985842e-06, "epoch": 0.805067968853108, "percentage": 16.1, "elapsed_time": "0:08:53", "remaining_time": "0:46:17", "throughput": 5624.71, "total_tokens": 2998016} +{"current_steps": 6105, "total_steps": 37885, "loss": 0.2727, "lr": 1.977336988786985e-06, "epoch": 0.8057278606308566, "percentage": 16.11, "elapsed_time": "0:08:53", "remaining_time": "0:46:16", "throughput": 5626.4, "total_tokens": 3000832} +{"current_steps": 6110, "total_steps": 37885, "loss": 0.2474, "lr": 1.97723936015543e-06, "epoch": 0.8063877524086049, "percentage": 16.13, "elapsed_time": "0:08:53", "remaining_time": "0:46:15", "throughput": 5627.97, "total_tokens": 3003584} +{"current_steps": 6115, "total_steps": 37885, "loss": 0.0878, "lr": 1.9771415241118972e-06, "epoch": 0.8070476441863534, "percentage": 16.14, "elapsed_time": "0:08:54", "remaining_time": "0:46:14", "throughput": 5629.8, "total_tokens": 3006464} +{"current_steps": 6120, "total_steps": 37885, "loss": 0.1026, "lr": 1.9770434806771525e-06, "epoch": 0.8077075359641019, "percentage": 16.15, "elapsed_time": "0:08:54", "remaining_time": "0:46:13", "throughput": 5630.88, "total_tokens": 3008896} +{"current_steps": 6125, "total_steps": 37885, "loss": 0.0518, "lr": 1.976945229872003e-06, "epoch": 0.8083674277418503, "percentage": 16.17, "elapsed_time": "0:08:54", "remaining_time": "0:46:12", "throughput": 5632.03, "total_tokens": 3011392} +{"current_steps": 6130, "total_steps": 37885, "loss": 0.2063, "lr": 1.976846771717304e-06, "epoch": 0.8090273195195988, "percentage": 16.18, "elapsed_time": "0:08:55", "remaining_time": "0:46:11", "throughput": 5633.39, "total_tokens": 3014016} +{"current_steps": 6135, "total_steps": 37885, "loss": 0.1909, "lr": 1.9767481062339512e-06, "epoch": 0.8096872112973472, "percentage": 16.19, "elapsed_time": "0:08:55", "remaining_time": "0:46:10", "throughput": 5634.63, "total_tokens": 3016576} +{"current_steps": 6140, "total_steps": 37885, "loss": 0.0153, "lr": 1.976649233442886e-06, "epoch": 0.8103471030750957, "percentage": 16.21, "elapsed_time": "0:08:55", "remaining_time": "0:46:09", "throughput": 5635.68, "total_tokens": 3019008} +{"current_steps": 6145, "total_steps": 37885, "loss": 0.0678, "lr": 1.976550153365093e-06, "epoch": 0.8110069948528441, "percentage": 16.22, "elapsed_time": "0:08:56", "remaining_time": "0:46:08", "throughput": 5636.84, "total_tokens": 3021504} +{"current_steps": 6150, "total_steps": 37885, "loss": 0.0594, "lr": 1.9764508660216018e-06, "epoch": 0.8116668866305926, "percentage": 16.23, "elapsed_time": "0:08:56", "remaining_time": "0:46:07", "throughput": 5637.19, "total_tokens": 3023552} +{"current_steps": 6155, "total_steps": 37885, "loss": 0.1778, "lr": 1.976351371433485e-06, "epoch": 0.812326778408341, "percentage": 16.25, "elapsed_time": "0:08:56", "remaining_time": "0:46:06", "throughput": 5638.01, "total_tokens": 3025856} +{"current_steps": 6160, "total_steps": 37885, "loss": 0.2057, "lr": 1.9762516696218598e-06, "epoch": 0.8129866701860895, "percentage": 16.26, "elapsed_time": "0:08:57", "remaining_time": "0:46:05", "throughput": 5638.69, "total_tokens": 3028096} +{"current_steps": 6165, "total_steps": 37885, "loss": 0.3517, "lr": 1.9761517606078873e-06, "epoch": 0.8136465619638379, "percentage": 16.27, "elapsed_time": "0:08:57", "remaining_time": "0:46:04", "throughput": 5639.66, "total_tokens": 3030528} +{"current_steps": 6170, "total_steps": 37885, "loss": 0.2465, "lr": 1.9760516444127722e-06, "epoch": 0.8143064537415864, "percentage": 16.29, "elapsed_time": "0:08:57", "remaining_time": "0:46:03", "throughput": 5640.91, "total_tokens": 3033088} +{"current_steps": 6175, "total_steps": 37885, "loss": 0.0653, "lr": 1.975951321057764e-06, "epoch": 0.8149663455193348, "percentage": 16.3, "elapsed_time": "0:08:58", "remaining_time": "0:46:02", "throughput": 5641.34, "total_tokens": 3035200} +{"current_steps": 6180, "total_steps": 37885, "loss": 0.0755, "lr": 1.975850790564155e-06, "epoch": 0.8156262372970833, "percentage": 16.31, "elapsed_time": "0:08:58", "remaining_time": "0:46:01", "throughput": 5642.48, "total_tokens": 3037696} +{"current_steps": 6185, "total_steps": 37885, "loss": 0.1064, "lr": 1.9757500529532817e-06, "epoch": 0.8162861290748318, "percentage": 16.33, "elapsed_time": "0:08:58", "remaining_time": "0:46:00", "throughput": 5643.52, "total_tokens": 3040128} +{"current_steps": 6190, "total_steps": 37885, "loss": 0.1667, "lr": 1.975649108246526e-06, "epoch": 0.8169460208525802, "percentage": 16.34, "elapsed_time": "0:08:59", "remaining_time": "0:46:00", "throughput": 5644.54, "total_tokens": 3042560} +{"current_steps": 6195, "total_steps": 37885, "loss": 0.2541, "lr": 1.9755479564653123e-06, "epoch": 0.8176059126303287, "percentage": 16.35, "elapsed_time": "0:08:59", "remaining_time": "0:45:59", "throughput": 5645.24, "total_tokens": 3044800} +{"current_steps": 6200, "total_steps": 37885, "loss": 0.124, "lr": 1.975446597631109e-06, "epoch": 0.8182658044080771, "percentage": 16.37, "elapsed_time": "0:08:59", "remaining_time": "0:45:58", "throughput": 5645.96, "total_tokens": 3047040} +{"current_steps": 6205, "total_steps": 37885, "loss": 0.064, "lr": 1.975345031765429e-06, "epoch": 0.8189256961858256, "percentage": 16.38, "elapsed_time": "0:09:00", "remaining_time": "0:45:57", "throughput": 5647.19, "total_tokens": 3049600} +{"current_steps": 6210, "total_steps": 37885, "loss": 0.1276, "lr": 1.975243258889829e-06, "epoch": 0.819585587963574, "percentage": 16.39, "elapsed_time": "0:09:00", "remaining_time": "0:45:56", "throughput": 5648.83, "total_tokens": 3052416} +{"current_steps": 6215, "total_steps": 37885, "loss": 0.0928, "lr": 1.9751412790259093e-06, "epoch": 0.8202454797413224, "percentage": 16.4, "elapsed_time": "0:09:00", "remaining_time": "0:45:55", "throughput": 5650.16, "total_tokens": 3055040} +{"current_steps": 6220, "total_steps": 37885, "loss": 0.0983, "lr": 1.9750390921953144e-06, "epoch": 0.8209053715190708, "percentage": 16.42, "elapsed_time": "0:09:01", "remaining_time": "0:45:54", "throughput": 5651.84, "total_tokens": 3057856} +{"current_steps": 6225, "total_steps": 37885, "loss": 0.2008, "lr": 1.9749366984197335e-06, "epoch": 0.8215652632968193, "percentage": 16.43, "elapsed_time": "0:09:01", "remaining_time": "0:45:53", "throughput": 5652.64, "total_tokens": 3060160} +{"current_steps": 6230, "total_steps": 37885, "loss": 0.1972, "lr": 1.9748340977208975e-06, "epoch": 0.8222251550745677, "percentage": 16.44, "elapsed_time": "0:09:01", "remaining_time": "0:45:52", "throughput": 5653.6, "total_tokens": 3062592} +{"current_steps": 6235, "total_steps": 37885, "loss": 0.0591, "lr": 1.9747312901205837e-06, "epoch": 0.8228850468523162, "percentage": 16.46, "elapsed_time": "0:09:02", "remaining_time": "0:45:51", "throughput": 5654.68, "total_tokens": 3065088} +{"current_steps": 6240, "total_steps": 37885, "loss": 0.0013, "lr": 1.9746282756406126e-06, "epoch": 0.8235449386300646, "percentage": 16.47, "elapsed_time": "0:09:02", "remaining_time": "0:45:50", "throughput": 5655.95, "total_tokens": 3067712} +{"current_steps": 6245, "total_steps": 37885, "loss": 0.1508, "lr": 1.974525054302847e-06, "epoch": 0.8242048304078131, "percentage": 16.48, "elapsed_time": "0:09:02", "remaining_time": "0:45:49", "throughput": 5656.94, "total_tokens": 3070144} +{"current_steps": 6250, "total_steps": 37885, "loss": 0.2101, "lr": 1.974421626129196e-06, "epoch": 0.8248647221855616, "percentage": 16.5, "elapsed_time": "0:09:03", "remaining_time": "0:45:48", "throughput": 5657.69, "total_tokens": 3072448} +{"current_steps": 6255, "total_steps": 37885, "loss": 0.1979, "lr": 1.9743179911416104e-06, "epoch": 0.82552461396331, "percentage": 16.51, "elapsed_time": "0:09:03", "remaining_time": "0:45:47", "throughput": 5658.95, "total_tokens": 3075072} +{"current_steps": 6260, "total_steps": 37885, "loss": 0.1248, "lr": 1.9742141493620876e-06, "epoch": 0.8261845057410585, "percentage": 16.52, "elapsed_time": "0:09:03", "remaining_time": "0:45:46", "throughput": 5659.7, "total_tokens": 3077376} +{"current_steps": 6265, "total_steps": 37885, "loss": 0.2122, "lr": 1.9741101008126655e-06, "epoch": 0.8268443975188069, "percentage": 16.54, "elapsed_time": "0:09:04", "remaining_time": "0:45:45", "throughput": 5660.71, "total_tokens": 3079808} +{"current_steps": 6270, "total_steps": 37885, "loss": 0.0026, "lr": 1.974005845515429e-06, "epoch": 0.8275042892965554, "percentage": 16.55, "elapsed_time": "0:09:04", "remaining_time": "0:45:45", "throughput": 5662.21, "total_tokens": 3082560} +{"current_steps": 6275, "total_steps": 37885, "loss": 0.1156, "lr": 1.9739013834925047e-06, "epoch": 0.8281641810743038, "percentage": 16.56, "elapsed_time": "0:09:04", "remaining_time": "0:45:44", "throughput": 5662.49, "total_tokens": 3084608} +{"current_steps": 6280, "total_steps": 37885, "loss": 0.3242, "lr": 1.973796714766064e-06, "epoch": 0.8288240728520523, "percentage": 16.58, "elapsed_time": "0:09:05", "remaining_time": "0:45:43", "throughput": 5663.55, "total_tokens": 3087104} +{"current_steps": 6285, "total_steps": 37885, "loss": 0.0915, "lr": 1.973691839358323e-06, "epoch": 0.8294839646298007, "percentage": 16.59, "elapsed_time": "0:09:05", "remaining_time": "0:45:42", "throughput": 5664.25, "total_tokens": 3089408} +{"current_steps": 6290, "total_steps": 37885, "loss": 0.054, "lr": 1.973586757291539e-06, "epoch": 0.8301438564075492, "percentage": 16.6, "elapsed_time": "0:09:05", "remaining_time": "0:45:41", "throughput": 5665.11, "total_tokens": 3091776} +{"current_steps": 6295, "total_steps": 37885, "loss": 0.1558, "lr": 1.973481468588017e-06, "epoch": 0.8308037481852976, "percentage": 16.62, "elapsed_time": "0:09:06", "remaining_time": "0:45:40", "throughput": 5666.1, "total_tokens": 3094208} +{"current_steps": 6300, "total_steps": 37885, "loss": 0.0414, "lr": 1.973375973270102e-06, "epoch": 0.8314636399630461, "percentage": 16.63, "elapsed_time": "0:09:06", "remaining_time": "0:45:39", "throughput": 5667.34, "total_tokens": 3096768} +{"current_steps": 6305, "total_steps": 37885, "loss": 0.0018, "lr": 1.973270271360185e-06, "epoch": 0.8321235317407946, "percentage": 16.64, "elapsed_time": "0:09:06", "remaining_time": "0:45:38", "throughput": 5668.74, "total_tokens": 3099456} +{"current_steps": 6310, "total_steps": 37885, "loss": 0.1685, "lr": 1.9731643628807014e-06, "epoch": 0.832783423518543, "percentage": 16.66, "elapsed_time": "0:09:07", "remaining_time": "0:45:37", "throughput": 5670.24, "total_tokens": 3102208} +{"current_steps": 6315, "total_steps": 37885, "loss": 0.0576, "lr": 1.973058247854129e-06, "epoch": 0.8334433152962915, "percentage": 16.67, "elapsed_time": "0:09:07", "remaining_time": "0:45:36", "throughput": 5671.63, "total_tokens": 3104896} +{"current_steps": 6320, "total_steps": 37885, "loss": 0.1591, "lr": 1.9729519263029895e-06, "epoch": 0.8341032070740398, "percentage": 16.68, "elapsed_time": "0:09:07", "remaining_time": "0:45:35", "throughput": 5672.98, "total_tokens": 3107520} +{"current_steps": 6325, "total_steps": 37885, "loss": 0.0462, "lr": 1.972845398249849e-06, "epoch": 0.8347630988517883, "percentage": 16.7, "elapsed_time": "0:09:08", "remaining_time": "0:45:34", "throughput": 5674.37, "total_tokens": 3110144} +{"current_steps": 6330, "total_steps": 37885, "loss": 0.0463, "lr": 1.972738663717318e-06, "epoch": 0.8354229906295367, "percentage": 16.71, "elapsed_time": "0:09:08", "remaining_time": "0:45:33", "throughput": 5675.74, "total_tokens": 3112768} +{"current_steps": 6335, "total_steps": 37885, "loss": 0.0003, "lr": 1.9726317227280494e-06, "epoch": 0.8360828824072852, "percentage": 16.72, "elapsed_time": "0:09:08", "remaining_time": "0:45:33", "throughput": 5676.97, "total_tokens": 3115328} +{"current_steps": 6340, "total_steps": 37885, "loss": 0.0213, "lr": 1.972524575304741e-06, "epoch": 0.8367427741850336, "percentage": 16.73, "elapsed_time": "0:09:09", "remaining_time": "0:45:32", "throughput": 5678.21, "total_tokens": 3117888} +{"current_steps": 6345, "total_steps": 37885, "loss": 0.0118, "lr": 1.972417221470134e-06, "epoch": 0.8374026659627821, "percentage": 16.75, "elapsed_time": "0:09:09", "remaining_time": "0:45:31", "throughput": 5679.33, "total_tokens": 3120384} +{"current_steps": 6350, "total_steps": 37885, "loss": 0.1584, "lr": 1.972309661247013e-06, "epoch": 0.8380625577405305, "percentage": 16.76, "elapsed_time": "0:09:09", "remaining_time": "0:45:30", "throughput": 5680.63, "total_tokens": 3123008} +{"current_steps": 6355, "total_steps": 37885, "loss": 0.0573, "lr": 1.9722018946582075e-06, "epoch": 0.838722449518279, "percentage": 16.77, "elapsed_time": "0:09:10", "remaining_time": "0:45:29", "throughput": 5681.76, "total_tokens": 3125504} +{"current_steps": 6360, "total_steps": 37885, "loss": 0.0681, "lr": 1.9720939217265904e-06, "epoch": 0.8393823412960274, "percentage": 16.79, "elapsed_time": "0:09:10", "remaining_time": "0:45:28", "throughput": 5682.47, "total_tokens": 3127744} +{"current_steps": 6365, "total_steps": 37885, "loss": 0.1754, "lr": 1.9719857424750776e-06, "epoch": 0.8400422330737759, "percentage": 16.8, "elapsed_time": "0:09:10", "remaining_time": "0:45:27", "throughput": 5683.26, "total_tokens": 3130048} +{"current_steps": 6370, "total_steps": 37885, "loss": 0.0619, "lr": 1.971877356926629e-06, "epoch": 0.8407021248515244, "percentage": 16.81, "elapsed_time": "0:09:11", "remaining_time": "0:45:26", "throughput": 5684.28, "total_tokens": 3132480} +{"current_steps": 6375, "total_steps": 37885, "loss": 0.136, "lr": 1.9717687651042494e-06, "epoch": 0.8413620166292728, "percentage": 16.83, "elapsed_time": "0:09:11", "remaining_time": "0:45:25", "throughput": 5685.65, "total_tokens": 3135104} +{"current_steps": 6380, "total_steps": 37885, "loss": 0.2398, "lr": 1.971659967030987e-06, "epoch": 0.8420219084070213, "percentage": 16.84, "elapsed_time": "0:09:11", "remaining_time": "0:45:24", "throughput": 5686.32, "total_tokens": 3137344} +{"current_steps": 6385, "total_steps": 37885, "loss": 0.2223, "lr": 1.9715509627299324e-06, "epoch": 0.8426818001847697, "percentage": 16.85, "elapsed_time": "0:09:12", "remaining_time": "0:45:23", "throughput": 5687.9, "total_tokens": 3140096} +{"current_steps": 6390, "total_steps": 37885, "loss": 0.1451, "lr": 1.971441752224221e-06, "epoch": 0.8433416919625182, "percentage": 16.87, "elapsed_time": "0:09:12", "remaining_time": "0:45:22", "throughput": 5688.69, "total_tokens": 3142400} +{"current_steps": 6395, "total_steps": 37885, "loss": 0.0571, "lr": 1.971332335537033e-06, "epoch": 0.8440015837402666, "percentage": 16.88, "elapsed_time": "0:09:12", "remaining_time": "0:45:21", "throughput": 5689.17, "total_tokens": 3144512} +{"current_steps": 6400, "total_steps": 37885, "loss": 0.1166, "lr": 1.97122271269159e-06, "epoch": 0.8446614755180151, "percentage": 16.89, "elapsed_time": "0:09:13", "remaining_time": "0:45:20", "throughput": 5690.21, "total_tokens": 3146944} +{"current_steps": 6405, "total_steps": 37885, "loss": 0.1062, "lr": 1.97111288371116e-06, "epoch": 0.8453213672957635, "percentage": 16.91, "elapsed_time": "0:09:13", "remaining_time": "0:45:19", "throughput": 5691.27, "total_tokens": 3149376} +{"current_steps": 6410, "total_steps": 37885, "loss": 0.1249, "lr": 1.9710028486190524e-06, "epoch": 0.845981259073512, "percentage": 16.92, "elapsed_time": "0:09:13", "remaining_time": "0:45:18", "throughput": 5692.17, "total_tokens": 3151744} +{"current_steps": 6415, "total_steps": 37885, "loss": 0.039, "lr": 1.970892607438621e-06, "epoch": 0.8466411508512603, "percentage": 16.93, "elapsed_time": "0:09:14", "remaining_time": "0:45:17", "throughput": 5693.07, "total_tokens": 3154112} +{"current_steps": 6420, "total_steps": 37885, "loss": 0.0129, "lr": 1.970782160193265e-06, "epoch": 0.8473010426290088, "percentage": 16.95, "elapsed_time": "0:09:14", "remaining_time": "0:45:16", "throughput": 5693.98, "total_tokens": 3156480} +{"current_steps": 6425, "total_steps": 37885, "loss": 0.2154, "lr": 1.970671506906425e-06, "epoch": 0.8479609344067572, "percentage": 16.96, "elapsed_time": "0:09:14", "remaining_time": "0:45:15", "throughput": 5694.77, "total_tokens": 3158784} +{"current_steps": 6430, "total_steps": 37885, "loss": 0.1681, "lr": 1.970560647601587e-06, "epoch": 0.8486208261845057, "percentage": 16.97, "elapsed_time": "0:09:15", "remaining_time": "0:45:15", "throughput": 5695.66, "total_tokens": 3161152} +{"current_steps": 6435, "total_steps": 37885, "loss": 0.0015, "lr": 1.9704495823022797e-06, "epoch": 0.8492807179622542, "percentage": 16.99, "elapsed_time": "0:09:15", "remaining_time": "0:45:14", "throughput": 5696.99, "total_tokens": 3163776} +{"current_steps": 6440, "total_steps": 37885, "loss": 0.1335, "lr": 1.970338311032076e-06, "epoch": 0.8499406097400026, "percentage": 17.0, "elapsed_time": "0:09:15", "remaining_time": "0:45:13", "throughput": 5698.12, "total_tokens": 3166272} +{"current_steps": 6445, "total_steps": 37885, "loss": 0.1466, "lr": 1.970226833814592e-06, "epoch": 0.8506005015177511, "percentage": 17.01, "elapsed_time": "0:09:16", "remaining_time": "0:45:12", "throughput": 5698.98, "total_tokens": 3168640} +{"current_steps": 6450, "total_steps": 37885, "loss": 0.072, "lr": 1.970115150673489e-06, "epoch": 0.8512603932954995, "percentage": 17.03, "elapsed_time": "0:09:16", "remaining_time": "0:45:11", "throughput": 5699.91, "total_tokens": 3171008} +{"current_steps": 6455, "total_steps": 37885, "loss": 0.0785, "lr": 1.97000326163247e-06, "epoch": 0.851920285073248, "percentage": 17.04, "elapsed_time": "0:09:16", "remaining_time": "0:45:10", "throughput": 5700.71, "total_tokens": 3173312} +{"current_steps": 6460, "total_steps": 37885, "loss": 0.1788, "lr": 1.969891166715283e-06, "epoch": 0.8525801768509964, "percentage": 17.05, "elapsed_time": "0:09:16", "remaining_time": "0:45:09", "throughput": 5701.81, "total_tokens": 3175808} +{"current_steps": 6465, "total_steps": 37885, "loss": 0.1182, "lr": 1.969778865945719e-06, "epoch": 0.8532400686287449, "percentage": 17.06, "elapsed_time": "0:09:17", "remaining_time": "0:45:08", "throughput": 5702.47, "total_tokens": 3178048} +{"current_steps": 6470, "total_steps": 37885, "loss": 0.0031, "lr": 1.969666359347614e-06, "epoch": 0.8538999604064933, "percentage": 17.08, "elapsed_time": "0:09:17", "remaining_time": "0:45:07", "throughput": 5703.56, "total_tokens": 3180544} +{"current_steps": 6475, "total_steps": 37885, "loss": 0.268, "lr": 1.969553646944845e-06, "epoch": 0.8545598521842418, "percentage": 17.09, "elapsed_time": "0:09:17", "remaining_time": "0:45:06", "throughput": 5704.65, "total_tokens": 3183040} +{"current_steps": 6480, "total_steps": 37885, "loss": 0.0905, "lr": 1.969440728761336e-06, "epoch": 0.8552197439619902, "percentage": 17.1, "elapsed_time": "0:09:18", "remaining_time": "0:45:05", "throughput": 5705.96, "total_tokens": 3185664} +{"current_steps": 6485, "total_steps": 37885, "loss": 0.1175, "lr": 1.9693276048210524e-06, "epoch": 0.8558796357397387, "percentage": 17.12, "elapsed_time": "0:09:18", "remaining_time": "0:45:04", "throughput": 5707.86, "total_tokens": 3188672} +{"current_steps": 6490, "total_steps": 37885, "loss": 0.0078, "lr": 1.969214275148004e-06, "epoch": 0.8565395275174872, "percentage": 17.13, "elapsed_time": "0:09:18", "remaining_time": "0:45:04", "throughput": 5708.98, "total_tokens": 3191168} +{"current_steps": 6495, "total_steps": 37885, "loss": 0.2481, "lr": 1.9691007397662444e-06, "epoch": 0.8571994192952356, "percentage": 17.14, "elapsed_time": "0:09:19", "remaining_time": "0:45:03", "throughput": 5710.1, "total_tokens": 3193664} +{"current_steps": 6500, "total_steps": 37885, "loss": 0.0524, "lr": 1.96898699869987e-06, "epoch": 0.8578593110729841, "percentage": 17.16, "elapsed_time": "0:09:19", "remaining_time": "0:45:02", "throughput": 5711.3, "total_tokens": 3196224} +{"current_steps": 6505, "total_steps": 37885, "loss": 0.1735, "lr": 1.968873051973022e-06, "epoch": 0.8585192028507325, "percentage": 17.17, "elapsed_time": "0:09:19", "remaining_time": "0:45:01", "throughput": 5712.49, "total_tokens": 3198784} +{"current_steps": 6510, "total_steps": 37885, "loss": 0.1, "lr": 1.968758899609885e-06, "epoch": 0.859179094628481, "percentage": 17.18, "elapsed_time": "0:09:20", "remaining_time": "0:45:00", "throughput": 5713.91, "total_tokens": 3201472} +{"current_steps": 6515, "total_steps": 37885, "loss": 0.0387, "lr": 1.9686445416346866e-06, "epoch": 0.8598389864062294, "percentage": 17.2, "elapsed_time": "0:09:20", "remaining_time": "0:44:59", "throughput": 5714.36, "total_tokens": 3203584} +{"current_steps": 6520, "total_steps": 37885, "loss": 0.16, "lr": 1.9685299780716988e-06, "epoch": 0.8604988781839779, "percentage": 17.21, "elapsed_time": "0:09:20", "remaining_time": "0:44:58", "throughput": 5715.14, "total_tokens": 3205888} +{"current_steps": 6525, "total_steps": 37885, "loss": 0.2118, "lr": 1.968415208945237e-06, "epoch": 0.8611587699617262, "percentage": 17.22, "elapsed_time": "0:09:21", "remaining_time": "0:44:57", "throughput": 5715.58, "total_tokens": 3208000} +{"current_steps": 6530, "total_steps": 37885, "loss": 0.0009, "lr": 1.9683002342796594e-06, "epoch": 0.8618186617394747, "percentage": 17.24, "elapsed_time": "0:09:21", "remaining_time": "0:44:56", "throughput": 5716.25, "total_tokens": 3210240} +{"current_steps": 6535, "total_steps": 37885, "loss": 0.0847, "lr": 1.9681850540993687e-06, "epoch": 0.8624785535172231, "percentage": 17.25, "elapsed_time": "0:09:21", "remaining_time": "0:44:55", "throughput": 5717.22, "total_tokens": 3212672} +{"current_steps": 6540, "total_steps": 37885, "loss": 0.2278, "lr": 1.9680696684288116e-06, "epoch": 0.8631384452949716, "percentage": 17.26, "elapsed_time": "0:09:22", "remaining_time": "0:44:54", "throughput": 5718.6, "total_tokens": 3215360} +{"current_steps": 6545, "total_steps": 37885, "loss": 0.1291, "lr": 1.9679540772924773e-06, "epoch": 0.86379833707272, "percentage": 17.28, "elapsed_time": "0:09:22", "remaining_time": "0:44:53", "throughput": 5720.09, "total_tokens": 3218112} +{"current_steps": 6550, "total_steps": 37885, "loss": 0.0677, "lr": 1.9678382807149e-06, "epoch": 0.8644582288504685, "percentage": 17.29, "elapsed_time": "0:09:22", "remaining_time": "0:44:53", "throughput": 5720.63, "total_tokens": 3220288} +{"current_steps": 6555, "total_steps": 37885, "loss": 0.0061, "lr": 1.967722278720656e-06, "epoch": 0.865118120628217, "percentage": 17.3, "elapsed_time": "0:09:23", "remaining_time": "0:44:52", "throughput": 5722.01, "total_tokens": 3222976} +{"current_steps": 6560, "total_steps": 37885, "loss": 0.087, "lr": 1.967606071334366e-06, "epoch": 0.8657780124059654, "percentage": 17.32, "elapsed_time": "0:09:23", "remaining_time": "0:44:51", "throughput": 5723.1, "total_tokens": 3225472} +{"current_steps": 6565, "total_steps": 37885, "loss": 0.2098, "lr": 1.9674896585806938e-06, "epoch": 0.8664379041837139, "percentage": 17.33, "elapsed_time": "0:09:23", "remaining_time": "0:44:50", "throughput": 5724.41, "total_tokens": 3228096} +{"current_steps": 6570, "total_steps": 37885, "loss": 0.1914, "lr": 1.967373040484348e-06, "epoch": 0.8670977959614623, "percentage": 17.34, "elapsed_time": "0:09:24", "remaining_time": "0:44:49", "throughput": 5725.71, "total_tokens": 3230720} +{"current_steps": 6575, "total_steps": 37885, "loss": 0.1312, "lr": 1.9672562170700794e-06, "epoch": 0.8677576877392108, "percentage": 17.36, "elapsed_time": "0:09:24", "remaining_time": "0:44:48", "throughput": 5726.58, "total_tokens": 3233088} +{"current_steps": 6580, "total_steps": 37885, "loss": 0.139, "lr": 1.967139188362683e-06, "epoch": 0.8684175795169592, "percentage": 17.37, "elapsed_time": "0:09:24", "remaining_time": "0:44:47", "throughput": 5727.89, "total_tokens": 3235712} +{"current_steps": 6585, "total_steps": 37885, "loss": 0.1531, "lr": 1.9670219543869977e-06, "epoch": 0.8690774712947077, "percentage": 17.38, "elapsed_time": "0:09:25", "remaining_time": "0:44:46", "throughput": 5729.48, "total_tokens": 3238528} +{"current_steps": 6590, "total_steps": 37885, "loss": 0.1389, "lr": 1.9669045151679045e-06, "epoch": 0.8697373630724561, "percentage": 17.39, "elapsed_time": "0:09:25", "remaining_time": "0:44:45", "throughput": 5730.35, "total_tokens": 3240896} +{"current_steps": 6595, "total_steps": 37885, "loss": 0.0033, "lr": 1.9667868707303304e-06, "epoch": 0.8703972548502046, "percentage": 17.41, "elapsed_time": "0:09:25", "remaining_time": "0:44:44", "throughput": 5731.41, "total_tokens": 3243392} +{"current_steps": 6600, "total_steps": 37885, "loss": 0.0405, "lr": 1.966669021099244e-06, "epoch": 0.871057146627953, "percentage": 17.42, "elapsed_time": "0:09:26", "remaining_time": "0:44:43", "throughput": 5732.39, "total_tokens": 3245824} +{"current_steps": 6605, "total_steps": 37885, "loss": 0.002, "lr": 1.966550966299657e-06, "epoch": 0.8717170384057015, "percentage": 17.43, "elapsed_time": "0:09:26", "remaining_time": "0:44:43", "throughput": 5733.14, "total_tokens": 3248128} +{"current_steps": 6610, "total_steps": 37885, "loss": 0.2562, "lr": 1.9664327063566273e-06, "epoch": 0.8723769301834499, "percentage": 17.45, "elapsed_time": "0:09:26", "remaining_time": "0:44:42", "throughput": 5734.23, "total_tokens": 3250624} +{"current_steps": 6615, "total_steps": 37885, "loss": 0.1405, "lr": 1.966314241295254e-06, "epoch": 0.8730368219611984, "percentage": 17.46, "elapsed_time": "0:09:27", "remaining_time": "0:44:41", "throughput": 5735.62, "total_tokens": 3253312} +{"current_steps": 6620, "total_steps": 37885, "loss": 0.1581, "lr": 1.9661955711406808e-06, "epoch": 0.8736967137389469, "percentage": 17.47, "elapsed_time": "0:09:27", "remaining_time": "0:44:40", "throughput": 5736.18, "total_tokens": 3255488} +{"current_steps": 6625, "total_steps": 37885, "loss": 0.0712, "lr": 1.966076695918094e-06, "epoch": 0.8743566055166953, "percentage": 17.49, "elapsed_time": "0:09:27", "remaining_time": "0:44:39", "throughput": 5736.74, "total_tokens": 3257664} +{"current_steps": 6630, "total_steps": 37885, "loss": 0.0422, "lr": 1.9659576156527236e-06, "epoch": 0.8750164972944438, "percentage": 17.5, "elapsed_time": "0:09:28", "remaining_time": "0:44:38", "throughput": 5737.79, "total_tokens": 3260160} +{"current_steps": 6635, "total_steps": 37885, "loss": 0.15, "lr": 1.965838330369845e-06, "epoch": 0.8756763890721921, "percentage": 17.51, "elapsed_time": "0:09:28", "remaining_time": "0:44:37", "throughput": 5738.64, "total_tokens": 3262528} +{"current_steps": 6640, "total_steps": 37885, "loss": 0.099, "lr": 1.9657188400947748e-06, "epoch": 0.8763362808499406, "percentage": 17.53, "elapsed_time": "0:09:28", "remaining_time": "0:44:36", "throughput": 5739.72, "total_tokens": 3265024} +{"current_steps": 6645, "total_steps": 37885, "loss": 0.2838, "lr": 1.965599144852874e-06, "epoch": 0.876996172627689, "percentage": 17.54, "elapsed_time": "0:09:29", "remaining_time": "0:44:35", "throughput": 5740.67, "total_tokens": 3267456} +{"current_steps": 6650, "total_steps": 37885, "loss": 0.0717, "lr": 1.9654792446695467e-06, "epoch": 0.8776560644054375, "percentage": 17.55, "elapsed_time": "0:09:29", "remaining_time": "0:44:34", "throughput": 5742.14, "total_tokens": 3270208} +{"current_steps": 6655, "total_steps": 37885, "loss": 0.1191, "lr": 1.9653591395702408e-06, "epoch": 0.8783159561831859, "percentage": 17.57, "elapsed_time": "0:09:29", "remaining_time": "0:44:34", "throughput": 5743.62, "total_tokens": 3272960} +{"current_steps": 6660, "total_steps": 37885, "loss": 0.1331, "lr": 1.9652388295804484e-06, "epoch": 0.8789758479609344, "percentage": 17.58, "elapsed_time": "0:09:30", "remaining_time": "0:44:33", "throughput": 5744.17, "total_tokens": 3275136} +{"current_steps": 6665, "total_steps": 37885, "loss": 0.2028, "lr": 1.9651183147257046e-06, "epoch": 0.8796357397386828, "percentage": 17.59, "elapsed_time": "0:09:30", "remaining_time": "0:44:32", "throughput": 5745.36, "total_tokens": 3277696} +{"current_steps": 6670, "total_steps": 37885, "loss": 0.1612, "lr": 1.964997595031587e-06, "epoch": 0.8802956315164313, "percentage": 17.61, "elapsed_time": "0:09:30", "remaining_time": "0:44:31", "throughput": 5746.21, "total_tokens": 3280064} +{"current_steps": 6675, "total_steps": 37885, "loss": 0.185, "lr": 1.964876670523718e-06, "epoch": 0.8809555232941798, "percentage": 17.62, "elapsed_time": "0:09:31", "remaining_time": "0:44:30", "throughput": 5746.84, "total_tokens": 3282304} +{"current_steps": 6680, "total_steps": 37885, "loss": 0.102, "lr": 1.9647555412277623e-06, "epoch": 0.8816154150719282, "percentage": 17.63, "elapsed_time": "0:09:31", "remaining_time": "0:44:29", "throughput": 5747.78, "total_tokens": 3284736} +{"current_steps": 6685, "total_steps": 37885, "loss": 0.0322, "lr": 1.9646342071694298e-06, "epoch": 0.8822753068496767, "percentage": 17.65, "elapsed_time": "0:09:31", "remaining_time": "0:44:28", "throughput": 5748.73, "total_tokens": 3287168} +{"current_steps": 6690, "total_steps": 37885, "loss": 0.1026, "lr": 1.9645126683744718e-06, "epoch": 0.8829351986274251, "percentage": 17.66, "elapsed_time": "0:09:32", "remaining_time": "0:44:27", "throughput": 5749.69, "total_tokens": 3289600} +{"current_steps": 6695, "total_steps": 37885, "loss": 0.0023, "lr": 1.9643909248686847e-06, "epoch": 0.8835950904051736, "percentage": 17.67, "elapsed_time": "0:09:32", "remaining_time": "0:44:26", "throughput": 5750.87, "total_tokens": 3292160} +{"current_steps": 6700, "total_steps": 37885, "loss": 0.1903, "lr": 1.964268976677907e-06, "epoch": 0.884254982182922, "percentage": 17.69, "elapsed_time": "0:09:32", "remaining_time": "0:44:26", "throughput": 5751.82, "total_tokens": 3294592} +{"current_steps": 6705, "total_steps": 37885, "loss": 0.0425, "lr": 1.964146823828022e-06, "epoch": 0.8849148739606705, "percentage": 17.7, "elapsed_time": "0:09:33", "remaining_time": "0:44:25", "throughput": 5752.66, "total_tokens": 3296960} +{"current_steps": 6710, "total_steps": 37885, "loss": 0.035, "lr": 1.9640244663449548e-06, "epoch": 0.8855747657384189, "percentage": 17.71, "elapsed_time": "0:09:33", "remaining_time": "0:44:24", "throughput": 5753.32, "total_tokens": 3299200} +{"current_steps": 6715, "total_steps": 37885, "loss": 0.2501, "lr": 1.963901904254676e-06, "epoch": 0.8862346575161674, "percentage": 17.72, "elapsed_time": "0:09:33", "remaining_time": "0:44:23", "throughput": 5754.19, "total_tokens": 3301568} +{"current_steps": 6720, "total_steps": 37885, "loss": 0.1129, "lr": 1.963779137583198e-06, "epoch": 0.8868945492939158, "percentage": 17.74, "elapsed_time": "0:09:34", "remaining_time": "0:44:22", "throughput": 5755.23, "total_tokens": 3304064} +{"current_steps": 6725, "total_steps": 37885, "loss": 0.1272, "lr": 1.963656166356577e-06, "epoch": 0.8875544410716643, "percentage": 17.75, "elapsed_time": "0:09:34", "remaining_time": "0:44:21", "throughput": 5756.09, "total_tokens": 3306432} +{"current_steps": 6730, "total_steps": 37885, "loss": 0.1033, "lr": 1.9635329906009135e-06, "epoch": 0.8882143328494126, "percentage": 17.76, "elapsed_time": "0:09:34", "remaining_time": "0:44:20", "throughput": 5756.83, "total_tokens": 3308736} +{"current_steps": 6735, "total_steps": 37885, "loss": 0.0311, "lr": 1.96340961034235e-06, "epoch": 0.8888742246271611, "percentage": 17.78, "elapsed_time": "0:09:35", "remaining_time": "0:44:19", "throughput": 5757.77, "total_tokens": 3311168} +{"current_steps": 6740, "total_steps": 37885, "loss": 0.1654, "lr": 1.9632860256070727e-06, "epoch": 0.8895341164049096, "percentage": 17.79, "elapsed_time": "0:09:35", "remaining_time": "0:44:18", "throughput": 5758.79, "total_tokens": 3313664} +{"current_steps": 6745, "total_steps": 37885, "loss": 0.1481, "lr": 1.9631622364213124e-06, "epoch": 0.890194008182658, "percentage": 17.8, "elapsed_time": "0:09:35", "remaining_time": "0:44:18", "throughput": 5759.94, "total_tokens": 3316224} +{"current_steps": 6750, "total_steps": 37885, "loss": 0.0998, "lr": 1.9630382428113416e-06, "epoch": 0.8908538999604065, "percentage": 17.82, "elapsed_time": "0:09:36", "remaining_time": "0:44:17", "throughput": 5760.55, "total_tokens": 3318464} +{"current_steps": 6755, "total_steps": 37885, "loss": 0.0018, "lr": 1.962914044803478e-06, "epoch": 0.8915137917381549, "percentage": 17.83, "elapsed_time": "0:09:36", "remaining_time": "0:44:16", "throughput": 5761.46, "total_tokens": 3320896} +{"current_steps": 6760, "total_steps": 37885, "loss": 0.1516, "lr": 1.9627896424240814e-06, "epoch": 0.8921736835159034, "percentage": 17.84, "elapsed_time": "0:09:36", "remaining_time": "0:44:15", "throughput": 5762.87, "total_tokens": 3323648} +{"current_steps": 6765, "total_steps": 37885, "loss": 0.2309, "lr": 1.9626650356995545e-06, "epoch": 0.8928335752936518, "percentage": 17.86, "elapsed_time": "0:09:37", "remaining_time": "0:44:14", "throughput": 5764.0, "total_tokens": 3326208} +{"current_steps": 6770, "total_steps": 37885, "loss": 0.1373, "lr": 1.9625402246563456e-06, "epoch": 0.8934934670714003, "percentage": 17.87, "elapsed_time": "0:09:37", "remaining_time": "0:44:13", "throughput": 5764.86, "total_tokens": 3328576} +{"current_steps": 6775, "total_steps": 37885, "loss": 0.0354, "lr": 1.962415209320944e-06, "epoch": 0.8941533588491487, "percentage": 17.88, "elapsed_time": "0:09:37", "remaining_time": "0:44:12", "throughput": 5766.55, "total_tokens": 3331520} +{"current_steps": 6780, "total_steps": 37885, "loss": 0.0489, "lr": 1.9622899897198834e-06, "epoch": 0.8948132506268972, "percentage": 17.9, "elapsed_time": "0:09:38", "remaining_time": "0:44:12", "throughput": 5768.08, "total_tokens": 3334336} +{"current_steps": 6785, "total_steps": 37885, "loss": 0.1136, "lr": 1.962164565879741e-06, "epoch": 0.8954731424046456, "percentage": 17.91, "elapsed_time": "0:09:38", "remaining_time": "0:44:11", "throughput": 5769.22, "total_tokens": 3336896} +{"current_steps": 6790, "total_steps": 37885, "loss": 0.1573, "lr": 1.9620389378271363e-06, "epoch": 0.8961330341823941, "percentage": 17.92, "elapsed_time": "0:09:38", "remaining_time": "0:44:10", "throughput": 5770.1, "total_tokens": 3339328} +{"current_steps": 6795, "total_steps": 37885, "loss": 0.0079, "lr": 1.9619131055887343e-06, "epoch": 0.8967929259601425, "percentage": 17.94, "elapsed_time": "0:09:39", "remaining_time": "0:44:09", "throughput": 5771.05, "total_tokens": 3341760} +{"current_steps": 6800, "total_steps": 37885, "loss": 0.1041, "lr": 1.961787069191241e-06, "epoch": 0.897452817737891, "percentage": 17.95, "elapsed_time": "0:09:39", "remaining_time": "0:44:08", "throughput": 5772.36, "total_tokens": 3344448} +{"current_steps": 6805, "total_steps": 37885, "loss": 0.0233, "lr": 1.9616608286614065e-06, "epoch": 0.8981127095156395, "percentage": 17.96, "elapsed_time": "0:09:39", "remaining_time": "0:44:07", "throughput": 5773.5, "total_tokens": 3347008} +{"current_steps": 6810, "total_steps": 37885, "loss": 0.0408, "lr": 1.9615343840260255e-06, "epoch": 0.8987726012933879, "percentage": 17.98, "elapsed_time": "0:09:40", "remaining_time": "0:44:06", "throughput": 5775.0, "total_tokens": 3349824} +{"current_steps": 6815, "total_steps": 37885, "loss": 0.0705, "lr": 1.9614077353119345e-06, "epoch": 0.8994324930711364, "percentage": 17.99, "elapsed_time": "0:09:40", "remaining_time": "0:44:06", "throughput": 5776.0, "total_tokens": 3352320} +{"current_steps": 6820, "total_steps": 37885, "loss": 0.0009, "lr": 1.961280882546013e-06, "epoch": 0.9000923848488848, "percentage": 18.0, "elapsed_time": "0:09:40", "remaining_time": "0:44:05", "throughput": 5776.78, "total_tokens": 3354688} +{"current_steps": 6825, "total_steps": 37885, "loss": 0.0692, "lr": 1.961153825755186e-06, "epoch": 0.9007522766266333, "percentage": 18.02, "elapsed_time": "0:09:41", "remaining_time": "0:44:04", "throughput": 5777.56, "total_tokens": 3357056} +{"current_steps": 6830, "total_steps": 37885, "loss": 0.0761, "lr": 1.961026564966419e-06, "epoch": 0.9014121684043817, "percentage": 18.03, "elapsed_time": "0:09:41", "remaining_time": "0:44:03", "throughput": 5778.48, "total_tokens": 3359488} +{"current_steps": 6835, "total_steps": 37885, "loss": 0.3297, "lr": 1.9608991002067233e-06, "epoch": 0.9020720601821302, "percentage": 18.04, "elapsed_time": "0:09:41", "remaining_time": "0:44:02", "throughput": 5779.4, "total_tokens": 3361920} +{"current_steps": 6840, "total_steps": 37885, "loss": 0.0016, "lr": 1.9607714315031513e-06, "epoch": 0.9027319519598785, "percentage": 18.05, "elapsed_time": "0:09:42", "remaining_time": "0:44:01", "throughput": 5780.45, "total_tokens": 3364416} +{"current_steps": 6845, "total_steps": 37885, "loss": 0.1103, "lr": 1.9606435588828008e-06, "epoch": 0.903391843737627, "percentage": 18.07, "elapsed_time": "0:09:42", "remaining_time": "0:44:00", "throughput": 5781.5, "total_tokens": 3366912} +{"current_steps": 6850, "total_steps": 37885, "loss": 0.0008, "lr": 1.960515482372811e-06, "epoch": 0.9040517355153754, "percentage": 18.08, "elapsed_time": "0:09:42", "remaining_time": "0:43:59", "throughput": 5782.01, "total_tokens": 3369088} +{"current_steps": 6855, "total_steps": 37885, "loss": 0.2938, "lr": 1.960387202000366e-06, "epoch": 0.9047116272931239, "percentage": 18.09, "elapsed_time": "0:09:43", "remaining_time": "0:43:59", "throughput": 5782.93, "total_tokens": 3371520} +{"current_steps": 6860, "total_steps": 37885, "loss": 0.0004, "lr": 1.9602587177926913e-06, "epoch": 0.9053715190708723, "percentage": 18.11, "elapsed_time": "0:09:43", "remaining_time": "0:43:58", "throughput": 5784.04, "total_tokens": 3374080} +{"current_steps": 6865, "total_steps": 37885, "loss": 0.0758, "lr": 1.960130029777058e-06, "epoch": 0.9060314108486208, "percentage": 18.12, "elapsed_time": "0:09:43", "remaining_time": "0:43:57", "throughput": 5785.14, "total_tokens": 3376640} +{"current_steps": 6870, "total_steps": 37885, "loss": 0.0005, "lr": 1.9600011379807783e-06, "epoch": 0.9066913026263693, "percentage": 18.13, "elapsed_time": "0:09:44", "remaining_time": "0:43:56", "throughput": 5786.05, "total_tokens": 3379072} +{"current_steps": 6875, "total_steps": 37885, "loss": 0.05, "lr": 1.9598720424312093e-06, "epoch": 0.9073511944041177, "percentage": 18.15, "elapsed_time": "0:09:44", "remaining_time": "0:43:55", "throughput": 5787.27, "total_tokens": 3381696} +{"current_steps": 6880, "total_steps": 37885, "loss": 0.317, "lr": 1.9597427431557497e-06, "epoch": 0.9080110861818662, "percentage": 18.16, "elapsed_time": "0:09:44", "remaining_time": "0:43:54", "throughput": 5788.1, "total_tokens": 3384064} +{"current_steps": 6885, "total_steps": 37885, "loss": 0.1413, "lr": 1.9596132401818427e-06, "epoch": 0.9086709779596146, "percentage": 18.17, "elapsed_time": "0:09:44", "remaining_time": "0:43:53", "throughput": 5788.67, "total_tokens": 3386304} +{"current_steps": 6890, "total_steps": 37885, "loss": 0.078, "lr": 1.9594835335369748e-06, "epoch": 0.9093308697373631, "percentage": 18.19, "elapsed_time": "0:09:45", "remaining_time": "0:43:53", "throughput": 5789.68, "total_tokens": 3388800} +{"current_steps": 6895, "total_steps": 37885, "loss": 0.1664, "lr": 1.9593536232486747e-06, "epoch": 0.9099907615151115, "percentage": 18.2, "elapsed_time": "0:09:45", "remaining_time": "0:43:52", "throughput": 5790.56, "total_tokens": 3391232} +{"current_steps": 6900, "total_steps": 37885, "loss": 0.0852, "lr": 1.9592235093445153e-06, "epoch": 0.91065065329286, "percentage": 18.21, "elapsed_time": "0:09:45", "remaining_time": "0:43:51", "throughput": 5791.45, "total_tokens": 3393664} +{"current_steps": 6905, "total_steps": 37885, "loss": 0.1319, "lr": 1.959093191852112e-06, "epoch": 0.9113105450706084, "percentage": 18.23, "elapsed_time": "0:09:46", "remaining_time": "0:43:50", "throughput": 5792.12, "total_tokens": 3395968} +{"current_steps": 6910, "total_steps": 37885, "loss": 0.1763, "lr": 1.958962670799124e-06, "epoch": 0.9119704368483569, "percentage": 18.24, "elapsed_time": "0:09:46", "remaining_time": "0:43:49", "throughput": 5792.83, "total_tokens": 3398272} +{"current_steps": 6915, "total_steps": 37885, "loss": 0.2054, "lr": 1.9588319462132535e-06, "epoch": 0.9126303286261053, "percentage": 18.25, "elapsed_time": "0:09:46", "remaining_time": "0:43:48", "throughput": 5794.12, "total_tokens": 3400960} +{"current_steps": 6920, "total_steps": 37885, "loss": 0.2306, "lr": 1.9587010181222456e-06, "epoch": 0.9132902204038538, "percentage": 18.27, "elapsed_time": "0:09:47", "remaining_time": "0:43:47", "throughput": 5795.21, "total_tokens": 3403520} +{"current_steps": 6925, "total_steps": 37885, "loss": 0.2867, "lr": 1.9585698865538892e-06, "epoch": 0.9139501121816023, "percentage": 18.28, "elapsed_time": "0:09:47", "remaining_time": "0:43:47", "throughput": 5796.12, "total_tokens": 3405952} +{"current_steps": 6930, "total_steps": 37885, "loss": 0.1133, "lr": 1.9584385515360155e-06, "epoch": 0.9146100039593507, "percentage": 18.29, "elapsed_time": "0:09:47", "remaining_time": "0:43:46", "throughput": 5796.94, "total_tokens": 3408320} +{"current_steps": 6935, "total_steps": 37885, "loss": 0.0866, "lr": 1.9583070130965e-06, "epoch": 0.9152698957370992, "percentage": 18.31, "elapsed_time": "0:09:48", "remaining_time": "0:43:45", "throughput": 5797.99, "total_tokens": 3410880} +{"current_steps": 6940, "total_steps": 37885, "loss": 0.0082, "lr": 1.95817527126326e-06, "epoch": 0.9159297875148475, "percentage": 18.32, "elapsed_time": "0:09:48", "remaining_time": "0:43:44", "throughput": 5799.1, "total_tokens": 3413440} +{"current_steps": 6945, "total_steps": 37885, "loss": 0.1116, "lr": 1.9580433260642576e-06, "epoch": 0.916589679292596, "percentage": 18.33, "elapsed_time": "0:09:48", "remaining_time": "0:43:43", "throughput": 5800.19, "total_tokens": 3416000} +{"current_steps": 6950, "total_steps": 37885, "loss": 0.1138, "lr": 1.9579111775274967e-06, "epoch": 0.9172495710703444, "percentage": 18.34, "elapsed_time": "0:09:49", "remaining_time": "0:43:42", "throughput": 5800.79, "total_tokens": 3418240} +{"current_steps": 6955, "total_steps": 37885, "loss": 0.1691, "lr": 1.957778825681025e-06, "epoch": 0.9179094628480929, "percentage": 18.36, "elapsed_time": "0:09:49", "remaining_time": "0:43:42", "throughput": 5801.7, "total_tokens": 3420672} +{"current_steps": 6960, "total_steps": 37885, "loss": 0.0336, "lr": 1.9576462705529334e-06, "epoch": 0.9185693546258413, "percentage": 18.37, "elapsed_time": "0:09:49", "remaining_time": "0:43:41", "throughput": 5802.3, "total_tokens": 3422912} +{"current_steps": 6965, "total_steps": 37885, "loss": 0.0039, "lr": 1.9575135121713554e-06, "epoch": 0.9192292464035898, "percentage": 18.38, "elapsed_time": "0:09:50", "remaining_time": "0:43:40", "throughput": 5803.27, "total_tokens": 3425408} +{"current_steps": 6970, "total_steps": 37885, "loss": 0.0885, "lr": 1.9573805505644687e-06, "epoch": 0.9198891381813382, "percentage": 18.4, "elapsed_time": "0:09:50", "remaining_time": "0:43:39", "throughput": 5804.06, "total_tokens": 3427776} +{"current_steps": 6975, "total_steps": 37885, "loss": 0.1885, "lr": 1.9572473857604924e-06, "epoch": 0.9205490299590867, "percentage": 18.41, "elapsed_time": "0:09:50", "remaining_time": "0:43:38", "throughput": 5805.16, "total_tokens": 3430336} +{"current_steps": 6980, "total_steps": 37885, "loss": 0.2446, "lr": 1.9571140177876904e-06, "epoch": 0.9212089217368351, "percentage": 18.42, "elapsed_time": "0:09:51", "remaining_time": "0:43:37", "throughput": 5806.24, "total_tokens": 3432896} +{"current_steps": 6985, "total_steps": 37885, "loss": 0.0608, "lr": 1.956980446674369e-06, "epoch": 0.9218688135145836, "percentage": 18.44, "elapsed_time": "0:09:51", "remaining_time": "0:43:36", "throughput": 5806.85, "total_tokens": 3435136} +{"current_steps": 6990, "total_steps": 37885, "loss": 0.0706, "lr": 1.9568466724488783e-06, "epoch": 0.9225287052923321, "percentage": 18.45, "elapsed_time": "0:09:51", "remaining_time": "0:43:36", "throughput": 5808.14, "total_tokens": 3437824} +{"current_steps": 6995, "total_steps": 37885, "loss": 0.0195, "lr": 1.95671269513961e-06, "epoch": 0.9231885970700805, "percentage": 18.46, "elapsed_time": "0:09:52", "remaining_time": "0:43:35", "throughput": 5809.14, "total_tokens": 3440320} +{"current_steps": 7000, "total_steps": 37885, "loss": 0.1083, "lr": 1.9565785147749994e-06, "epoch": 0.923848488847829, "percentage": 18.48, "elapsed_time": "0:09:52", "remaining_time": "0:43:34", "throughput": 5810.19, "total_tokens": 3442880} +{"current_steps": 7005, "total_steps": 37885, "loss": 0.0438, "lr": 1.956444131383527e-06, "epoch": 0.9245083806255774, "percentage": 18.49, "elapsed_time": "0:09:52", "remaining_time": "0:43:33", "throughput": 5810.77, "total_tokens": 3445120} +{"current_steps": 7010, "total_steps": 37885, "loss": 0.1449, "lr": 1.9563095449937133e-06, "epoch": 0.9251682724033259, "percentage": 18.5, "elapsed_time": "0:09:53", "remaining_time": "0:43:32", "throughput": 5811.44, "total_tokens": 3447424} +{"current_steps": 7015, "total_steps": 37885, "loss": 0.0746, "lr": 1.9561747556341236e-06, "epoch": 0.9258281641810743, "percentage": 18.52, "elapsed_time": "0:09:53", "remaining_time": "0:43:31", "throughput": 5812.44, "total_tokens": 3449920} +{"current_steps": 7020, "total_steps": 37885, "loss": 0.0844, "lr": 1.9560397633333663e-06, "epoch": 0.9264880559588228, "percentage": 18.53, "elapsed_time": "0:09:53", "remaining_time": "0:43:31", "throughput": 5813.43, "total_tokens": 3452416} +{"current_steps": 7025, "total_steps": 37885, "loss": 0.1329, "lr": 1.955904568120092e-06, "epoch": 0.9271479477365712, "percentage": 18.54, "elapsed_time": "0:09:54", "remaining_time": "0:43:30", "throughput": 5814.42, "total_tokens": 3454912} +{"current_steps": 7030, "total_steps": 37885, "loss": 0.0823, "lr": 1.955769170022996e-06, "epoch": 0.9278078395143197, "percentage": 18.56, "elapsed_time": "0:09:54", "remaining_time": "0:43:29", "throughput": 5815.51, "total_tokens": 3457472} +{"current_steps": 7035, "total_steps": 37885, "loss": 0.0728, "lr": 1.955633569070814e-06, "epoch": 0.928467731292068, "percentage": 18.57, "elapsed_time": "0:09:54", "remaining_time": "0:43:28", "throughput": 5816.1, "total_tokens": 3459712} +{"current_steps": 7040, "total_steps": 37885, "loss": 0.1126, "lr": 1.9554977652923276e-06, "epoch": 0.9291276230698166, "percentage": 18.58, "elapsed_time": "0:09:55", "remaining_time": "0:43:27", "throughput": 5816.97, "total_tokens": 3462144} +{"current_steps": 7045, "total_steps": 37885, "loss": 0.2659, "lr": 1.9553617587163594e-06, "epoch": 0.9297875148475649, "percentage": 18.6, "elapsed_time": "0:09:55", "remaining_time": "0:43:26", "throughput": 5817.78, "total_tokens": 3464512} +{"current_steps": 7050, "total_steps": 37885, "loss": 0.0937, "lr": 1.955225549371776e-06, "epoch": 0.9304474066253134, "percentage": 18.61, "elapsed_time": "0:09:55", "remaining_time": "0:43:26", "throughput": 5818.57, "total_tokens": 3466880} +{"current_steps": 7055, "total_steps": 37885, "loss": 0.1008, "lr": 1.9550891372874872e-06, "epoch": 0.931107298403062, "percentage": 18.62, "elapsed_time": "0:09:56", "remaining_time": "0:43:25", "throughput": 5819.34, "total_tokens": 3469248} +{"current_steps": 7060, "total_steps": 37885, "loss": 0.2362, "lr": 1.9549525224924453e-06, "epoch": 0.9317671901808103, "percentage": 18.64, "elapsed_time": "0:09:56", "remaining_time": "0:43:24", "throughput": 5820.11, "total_tokens": 3471616} +{"current_steps": 7065, "total_steps": 37885, "loss": 0.0761, "lr": 1.9548157050156456e-06, "epoch": 0.9324270819585588, "percentage": 18.65, "elapsed_time": "0:09:56", "remaining_time": "0:43:23", "throughput": 5821.28, "total_tokens": 3474240} +{"current_steps": 7070, "total_steps": 37885, "loss": 0.0566, "lr": 1.9546786848861268e-06, "epoch": 0.9330869737363072, "percentage": 18.66, "elapsed_time": "0:09:57", "remaining_time": "0:43:22", "throughput": 5822.36, "total_tokens": 3476800} +{"current_steps": 7075, "total_steps": 37885, "loss": 0.091, "lr": 1.95454146213297e-06, "epoch": 0.9337468655140557, "percentage": 18.67, "elapsed_time": "0:09:57", "remaining_time": "0:43:21", "throughput": 5823.61, "total_tokens": 3479488} +{"current_steps": 7080, "total_steps": 37885, "loss": 0.0015, "lr": 1.954404036785301e-06, "epoch": 0.9344067572918041, "percentage": 18.69, "elapsed_time": "0:09:57", "remaining_time": "0:43:21", "throughput": 5824.87, "total_tokens": 3482176} +{"current_steps": 7085, "total_steps": 37885, "loss": 0.078, "lr": 1.9542664088722857e-06, "epoch": 0.9350666490695526, "percentage": 18.7, "elapsed_time": "0:09:58", "remaining_time": "0:43:20", "throughput": 5826.06, "total_tokens": 3484800} +{"current_steps": 7090, "total_steps": 37885, "loss": 0.1244, "lr": 1.9541285784231355e-06, "epoch": 0.935726540847301, "percentage": 18.71, "elapsed_time": "0:09:58", "remaining_time": "0:43:19", "throughput": 5827.31, "total_tokens": 3487488} +{"current_steps": 7095, "total_steps": 37885, "loss": 0.3198, "lr": 1.9539905454671037e-06, "epoch": 0.9363864326250495, "percentage": 18.73, "elapsed_time": "0:09:58", "remaining_time": "0:43:18", "throughput": 5827.89, "total_tokens": 3489728} +{"current_steps": 7100, "total_steps": 37885, "loss": 0.2493, "lr": 1.953852310033487e-06, "epoch": 0.9370463244027979, "percentage": 18.74, "elapsed_time": "0:09:59", "remaining_time": "0:43:17", "throughput": 5828.38, "total_tokens": 3491904} +{"current_steps": 7105, "total_steps": 37885, "loss": 0.059, "lr": 1.9537138721516248e-06, "epoch": 0.9377062161805464, "percentage": 18.75, "elapsed_time": "0:09:59", "remaining_time": "0:43:16", "throughput": 5829.64, "total_tokens": 3494592} +{"current_steps": 7110, "total_steps": 37885, "loss": 0.1345, "lr": 1.9535752318508995e-06, "epoch": 0.9383661079582949, "percentage": 18.77, "elapsed_time": "0:09:59", "remaining_time": "0:43:16", "throughput": 5830.61, "total_tokens": 3497088} +{"current_steps": 7115, "total_steps": 37885, "loss": 0.0865, "lr": 1.9534363891607363e-06, "epoch": 0.9390259997360433, "percentage": 18.78, "elapsed_time": "0:10:00", "remaining_time": "0:43:15", "throughput": 5831.48, "total_tokens": 3499520} +{"current_steps": 7120, "total_steps": 37885, "loss": 0.1349, "lr": 1.953297344110604e-06, "epoch": 0.9396858915137918, "percentage": 18.79, "elapsed_time": "0:10:00", "remaining_time": "0:43:14", "throughput": 5832.74, "total_tokens": 3502208} +{"current_steps": 7125, "total_steps": 37885, "loss": 0.1946, "lr": 1.9531580967300135e-06, "epoch": 0.9403457832915402, "percentage": 18.81, "elapsed_time": "0:10:00", "remaining_time": "0:43:13", "throughput": 5833.62, "total_tokens": 3504640} +{"current_steps": 7130, "total_steps": 37885, "loss": 0.0946, "lr": 1.953018647048519e-06, "epoch": 0.9410056750692887, "percentage": 18.82, "elapsed_time": "0:10:01", "remaining_time": "0:43:12", "throughput": 5834.28, "total_tokens": 3506944} +{"current_steps": 7135, "total_steps": 37885, "loss": 0.2065, "lr": 1.9528789950957182e-06, "epoch": 0.9416655668470371, "percentage": 18.83, "elapsed_time": "0:10:01", "remaining_time": "0:43:11", "throughput": 5835.14, "total_tokens": 3509376} +{"current_steps": 7140, "total_steps": 37885, "loss": 0.096, "lr": 1.9527391409012507e-06, "epoch": 0.9423254586247856, "percentage": 18.85, "elapsed_time": "0:10:01", "remaining_time": "0:43:11", "throughput": 5835.82, "total_tokens": 3511680} +{"current_steps": 7145, "total_steps": 37885, "loss": 0.0617, "lr": 1.9525990844948e-06, "epoch": 0.942985350402534, "percentage": 18.86, "elapsed_time": "0:10:02", "remaining_time": "0:43:10", "throughput": 5836.68, "total_tokens": 3514112} +{"current_steps": 7150, "total_steps": 37885, "loss": 0.0848, "lr": 1.952458825906092e-06, "epoch": 0.9436452421802825, "percentage": 18.87, "elapsed_time": "0:10:02", "remaining_time": "0:43:09", "throughput": 5837.46, "total_tokens": 3516480} +{"current_steps": 7155, "total_steps": 37885, "loss": 0.1279, "lr": 1.952318365164895e-06, "epoch": 0.9443051339580308, "percentage": 18.89, "elapsed_time": "0:10:02", "remaining_time": "0:43:08", "throughput": 5838.02, "total_tokens": 3518720} +{"current_steps": 7160, "total_steps": 37885, "loss": 0.0011, "lr": 1.952177702301021e-06, "epoch": 0.9449650257357793, "percentage": 18.9, "elapsed_time": "0:10:03", "remaining_time": "0:43:07", "throughput": 5838.98, "total_tokens": 3521216} +{"current_steps": 7165, "total_steps": 37885, "loss": 0.2427, "lr": 1.9520368373443246e-06, "epoch": 0.9456249175135277, "percentage": 18.91, "elapsed_time": "0:10:03", "remaining_time": "0:43:07", "throughput": 5840.02, "total_tokens": 3523776} +{"current_steps": 7170, "total_steps": 37885, "loss": 0.0515, "lr": 1.951895770324704e-06, "epoch": 0.9462848092912762, "percentage": 18.93, "elapsed_time": "0:10:03", "remaining_time": "0:43:06", "throughput": 5841.01, "total_tokens": 3526272} +{"current_steps": 7175, "total_steps": 37885, "loss": 0.1211, "lr": 1.9517545012720993e-06, "epoch": 0.9469447010690247, "percentage": 18.94, "elapsed_time": "0:10:04", "remaining_time": "0:43:05", "throughput": 5841.68, "total_tokens": 3528576} +{"current_steps": 7180, "total_steps": 37885, "loss": 0.0018, "lr": 1.9516130302164937e-06, "epoch": 0.9476045928467731, "percentage": 18.95, "elapsed_time": "0:10:04", "remaining_time": "0:43:04", "throughput": 5842.69, "total_tokens": 3531136} +{"current_steps": 7185, "total_steps": 37885, "loss": 0.2951, "lr": 1.9514713571879135e-06, "epoch": 0.9482644846245216, "percentage": 18.97, "elapsed_time": "0:10:04", "remaining_time": "0:43:03", "throughput": 5843.73, "total_tokens": 3533696} +{"current_steps": 7190, "total_steps": 37885, "loss": 0.0036, "lr": 1.9513294822164274e-06, "epoch": 0.94892437640227, "percentage": 18.98, "elapsed_time": "0:10:05", "remaining_time": "0:43:02", "throughput": 5844.48, "total_tokens": 3536064} +{"current_steps": 7195, "total_steps": 37885, "loss": 0.0063, "lr": 1.9511874053321483e-06, "epoch": 0.9495842681800185, "percentage": 18.99, "elapsed_time": "0:10:05", "remaining_time": "0:43:02", "throughput": 5845.23, "total_tokens": 3538432} +{"current_steps": 7200, "total_steps": 37885, "loss": 0.0012, "lr": 1.95104512656523e-06, "epoch": 0.9502441599577669, "percentage": 19.0, "elapsed_time": "0:10:05", "remaining_time": "0:43:01", "throughput": 5846.46, "total_tokens": 3541120} +{"current_steps": 7205, "total_steps": 37885, "loss": 0.0973, "lr": 1.9509026459458702e-06, "epoch": 0.9509040517355154, "percentage": 19.02, "elapsed_time": "0:10:06", "remaining_time": "0:43:00", "throughput": 5847.52, "total_tokens": 3543680} +{"current_steps": 7210, "total_steps": 37885, "loss": 0.2608, "lr": 1.95075996350431e-06, "epoch": 0.9515639435132638, "percentage": 19.03, "elapsed_time": "0:10:06", "remaining_time": "0:42:59", "throughput": 5848.16, "total_tokens": 3545984} +{"current_steps": 7215, "total_steps": 37885, "loss": 0.0943, "lr": 1.9506170792708327e-06, "epoch": 0.9522238352910123, "percentage": 19.04, "elapsed_time": "0:10:06", "remaining_time": "0:42:58", "throughput": 5849.19, "total_tokens": 3548544} +{"current_steps": 7220, "total_steps": 37885, "loss": 0.0524, "lr": 1.950473993275764e-06, "epoch": 0.9528837270687607, "percentage": 19.06, "elapsed_time": "0:10:07", "remaining_time": "0:42:58", "throughput": 5850.14, "total_tokens": 3551040} +{"current_steps": 7225, "total_steps": 37885, "loss": 0.1268, "lr": 1.950330705549473e-06, "epoch": 0.9535436188465092, "percentage": 19.07, "elapsed_time": "0:10:07", "remaining_time": "0:42:57", "throughput": 5851.09, "total_tokens": 3553536} +{"current_steps": 7230, "total_steps": 37885, "loss": 0.1104, "lr": 1.950187216122371e-06, "epoch": 0.9542035106242576, "percentage": 19.08, "elapsed_time": "0:10:07", "remaining_time": "0:42:56", "throughput": 5851.55, "total_tokens": 3555712} +{"current_steps": 7235, "total_steps": 37885, "loss": 0.1443, "lr": 1.9500435250249136e-06, "epoch": 0.9548634024020061, "percentage": 19.1, "elapsed_time": "0:10:07", "remaining_time": "0:42:55", "throughput": 5852.3, "total_tokens": 3558080} +{"current_steps": 7240, "total_steps": 37885, "loss": 0.1269, "lr": 1.949899632287598e-06, "epoch": 0.9555232941797546, "percentage": 19.11, "elapsed_time": "0:10:08", "remaining_time": "0:42:54", "throughput": 5853.36, "total_tokens": 3560640} +{"current_steps": 7245, "total_steps": 37885, "loss": 0.0389, "lr": 1.9497555379409633e-06, "epoch": 0.956183185957503, "percentage": 19.12, "elapsed_time": "0:10:08", "remaining_time": "0:42:54", "throughput": 5854.67, "total_tokens": 3563392} +{"current_steps": 7250, "total_steps": 37885, "loss": 0.309, "lr": 1.9496112420155937e-06, "epoch": 0.9568430777352515, "percentage": 19.14, "elapsed_time": "0:10:08", "remaining_time": "0:42:53", "throughput": 5855.48, "total_tokens": 3565824} +{"current_steps": 7255, "total_steps": 37885, "loss": 0.0023, "lr": 1.949466744542115e-06, "epoch": 0.9575029695129998, "percentage": 19.15, "elapsed_time": "0:10:09", "remaining_time": "0:42:52", "throughput": 5856.32, "total_tokens": 3568256} +{"current_steps": 7260, "total_steps": 37885, "loss": 0.0659, "lr": 1.9493220455511943e-06, "epoch": 0.9581628612907483, "percentage": 19.16, "elapsed_time": "0:10:09", "remaining_time": "0:42:51", "throughput": 5857.29, "total_tokens": 3570752} +{"current_steps": 7265, "total_steps": 37885, "loss": 0.1136, "lr": 1.9491771450735444e-06, "epoch": 0.9588227530684967, "percentage": 19.18, "elapsed_time": "0:10:09", "remaining_time": "0:42:50", "throughput": 5857.72, "total_tokens": 3572928} +{"current_steps": 7270, "total_steps": 37885, "loss": 0.2459, "lr": 1.9490320431399186e-06, "epoch": 0.9594826448462452, "percentage": 19.19, "elapsed_time": "0:10:10", "remaining_time": "0:42:49", "throughput": 5858.45, "total_tokens": 3575296} +{"current_steps": 7275, "total_steps": 37885, "loss": 0.0014, "lr": 1.9488867397811143e-06, "epoch": 0.9601425366239936, "percentage": 19.2, "elapsed_time": "0:10:10", "remaining_time": "0:42:49", "throughput": 5859.17, "total_tokens": 3577664} +{"current_steps": 7280, "total_steps": 37885, "loss": 0.1765, "lr": 1.948741235027971e-06, "epoch": 0.9608024284017421, "percentage": 19.22, "elapsed_time": "0:10:10", "remaining_time": "0:42:48", "throughput": 5860.08, "total_tokens": 3580160} +{"current_steps": 7285, "total_steps": 37885, "loss": 0.1181, "lr": 1.9485955289113703e-06, "epoch": 0.9614623201794905, "percentage": 19.23, "elapsed_time": "0:10:11", "remaining_time": "0:42:47", "throughput": 5860.72, "total_tokens": 3582464} +{"current_steps": 7290, "total_steps": 37885, "loss": 0.1001, "lr": 1.9484496214622375e-06, "epoch": 0.962122211957239, "percentage": 19.24, "elapsed_time": "0:10:11", "remaining_time": "0:42:46", "throughput": 5861.58, "total_tokens": 3584896} +{"current_steps": 7295, "total_steps": 37885, "loss": 0.0035, "lr": 1.9483035127115416e-06, "epoch": 0.9627821037349875, "percentage": 19.26, "elapsed_time": "0:10:11", "remaining_time": "0:42:45", "throughput": 5862.77, "total_tokens": 3587584} +{"current_steps": 7300, "total_steps": 37885, "loss": 0.149, "lr": 1.948157202690292e-06, "epoch": 0.9634419955127359, "percentage": 19.27, "elapsed_time": "0:10:12", "remaining_time": "0:42:45", "throughput": 5863.81, "total_tokens": 3590144} +{"current_steps": 7305, "total_steps": 37885, "loss": 0.0409, "lr": 1.9480106914295416e-06, "epoch": 0.9641018872904844, "percentage": 19.28, "elapsed_time": "0:10:12", "remaining_time": "0:42:44", "throughput": 5865.02, "total_tokens": 3592832} +{"current_steps": 7310, "total_steps": 37885, "loss": 0.1264, "lr": 1.947863978960387e-06, "epoch": 0.9647617790682328, "percentage": 19.3, "elapsed_time": "0:10:12", "remaining_time": "0:42:43", "throughput": 5866.1, "total_tokens": 3595456} +{"current_steps": 7315, "total_steps": 37885, "loss": 0.1478, "lr": 1.947717065313967e-06, "epoch": 0.9654216708459813, "percentage": 19.31, "elapsed_time": "0:10:13", "remaining_time": "0:42:42", "throughput": 5866.95, "total_tokens": 3597888} +{"current_steps": 7320, "total_steps": 37885, "loss": 0.0841, "lr": 1.9475699505214625e-06, "epoch": 0.9660815626237297, "percentage": 19.32, "elapsed_time": "0:10:13", "remaining_time": "0:42:42", "throughput": 5867.86, "total_tokens": 3600384} +{"current_steps": 7325, "total_steps": 37885, "loss": 0.183, "lr": 1.947422634614098e-06, "epoch": 0.9667414544014782, "percentage": 19.33, "elapsed_time": "0:10:13", "remaining_time": "0:42:41", "throughput": 5868.77, "total_tokens": 3602880} +{"current_steps": 7330, "total_steps": 37885, "loss": 0.0148, "lr": 1.94727511762314e-06, "epoch": 0.9674013461792266, "percentage": 19.35, "elapsed_time": "0:10:14", "remaining_time": "0:42:40", "throughput": 5869.5, "total_tokens": 3605248} +{"current_steps": 7335, "total_steps": 37885, "loss": 0.0009, "lr": 1.9471273995798977e-06, "epoch": 0.9680612379569751, "percentage": 19.36, "elapsed_time": "0:10:14", "remaining_time": "0:42:39", "throughput": 5870.53, "total_tokens": 3607808} +{"current_steps": 7340, "total_steps": 37885, "loss": 0.2154, "lr": 1.9469794805157235e-06, "epoch": 0.9687211297347235, "percentage": 19.37, "elapsed_time": "0:10:14", "remaining_time": "0:42:38", "throughput": 5871.16, "total_tokens": 3610112} +{"current_steps": 7345, "total_steps": 37885, "loss": 0.1076, "lr": 1.946831360462012e-06, "epoch": 0.969381021512472, "percentage": 19.39, "elapsed_time": "0:10:15", "remaining_time": "0:42:38", "throughput": 5871.69, "total_tokens": 3612352} +{"current_steps": 7350, "total_steps": 37885, "loss": 0.184, "lr": 1.946683039450201e-06, "epoch": 0.9700409132902204, "percentage": 19.4, "elapsed_time": "0:10:15", "remaining_time": "0:42:37", "throughput": 5872.59, "total_tokens": 3614848} +{"current_steps": 7355, "total_steps": 37885, "loss": 0.0021, "lr": 1.9465345175117698e-06, "epoch": 0.9707008050679689, "percentage": 19.41, "elapsed_time": "0:10:15", "remaining_time": "0:42:36", "throughput": 5873.61, "total_tokens": 3617408} +{"current_steps": 7360, "total_steps": 37885, "loss": 0.1402, "lr": 1.9463857946782418e-06, "epoch": 0.9713606968457174, "percentage": 19.43, "elapsed_time": "0:10:16", "remaining_time": "0:42:35", "throughput": 5874.62, "total_tokens": 3619968} +{"current_steps": 7365, "total_steps": 37885, "loss": 0.1302, "lr": 1.9462368709811816e-06, "epoch": 0.9720205886234657, "percentage": 19.44, "elapsed_time": "0:10:16", "remaining_time": "0:42:34", "throughput": 5874.86, "total_tokens": 3622016} +{"current_steps": 7370, "total_steps": 37885, "loss": 0.115, "lr": 1.946087746452198e-06, "epoch": 0.9726804804012142, "percentage": 19.45, "elapsed_time": "0:10:16", "remaining_time": "0:42:34", "throughput": 5875.29, "total_tokens": 3624192} +{"current_steps": 7375, "total_steps": 37885, "loss": 0.0111, "lr": 1.945938421122941e-06, "epoch": 0.9733403721789626, "percentage": 19.47, "elapsed_time": "0:10:17", "remaining_time": "0:42:33", "throughput": 5876.1, "total_tokens": 3626624} +{"current_steps": 7380, "total_steps": 37885, "loss": 0.1937, "lr": 1.9457888950251045e-06, "epoch": 0.9740002639567111, "percentage": 19.48, "elapsed_time": "0:10:17", "remaining_time": "0:42:32", "throughput": 5876.71, "total_tokens": 3628928} +{"current_steps": 7385, "total_steps": 37885, "loss": 0.1085, "lr": 1.9456391681904234e-06, "epoch": 0.9746601557344595, "percentage": 19.49, "elapsed_time": "0:10:17", "remaining_time": "0:42:31", "throughput": 5877.81, "total_tokens": 3631552} +{"current_steps": 7390, "total_steps": 37885, "loss": 0.078, "lr": 1.9454892406506774e-06, "epoch": 0.975320047512208, "percentage": 19.51, "elapsed_time": "0:10:18", "remaining_time": "0:42:30", "throughput": 5878.63, "total_tokens": 3633984} +{"current_steps": 7395, "total_steps": 37885, "loss": 0.2813, "lr": 1.945339112437686e-06, "epoch": 0.9759799392899564, "percentage": 19.52, "elapsed_time": "0:10:18", "remaining_time": "0:42:30", "throughput": 5879.17, "total_tokens": 3636224} +{"current_steps": 7400, "total_steps": 37885, "loss": 0.1583, "lr": 1.945188783583314e-06, "epoch": 0.9766398310677049, "percentage": 19.53, "elapsed_time": "0:10:18", "remaining_time": "0:42:29", "throughput": 5879.98, "total_tokens": 3638656} +{"current_steps": 7405, "total_steps": 37885, "loss": 0.0369, "lr": 1.945038254119467e-06, "epoch": 0.9772997228454533, "percentage": 19.55, "elapsed_time": "0:10:19", "remaining_time": "0:42:28", "throughput": 5881.25, "total_tokens": 3641408} +{"current_steps": 7410, "total_steps": 37885, "loss": 0.133, "lr": 1.944887524078094e-06, "epoch": 0.9779596146232018, "percentage": 19.56, "elapsed_time": "0:10:19", "remaining_time": "0:42:27", "throughput": 5882.05, "total_tokens": 3643840} +{"current_steps": 7415, "total_steps": 37885, "loss": 0.058, "lr": 1.9447365934911862e-06, "epoch": 0.9786195064009502, "percentage": 19.57, "elapsed_time": "0:10:19", "remaining_time": "0:42:26", "throughput": 5882.97, "total_tokens": 3646336} +{"current_steps": 7420, "total_steps": 37885, "loss": 0.086, "lr": 1.944585462390778e-06, "epoch": 0.9792793981786987, "percentage": 19.59, "elapsed_time": "0:10:20", "remaining_time": "0:42:26", "throughput": 5884.07, "total_tokens": 3648960} +{"current_steps": 7425, "total_steps": 37885, "loss": 0.0681, "lr": 1.9444341308089456e-06, "epoch": 0.9799392899564472, "percentage": 19.6, "elapsed_time": "0:10:20", "remaining_time": "0:42:25", "throughput": 5884.54, "total_tokens": 3651200} +{"current_steps": 7430, "total_steps": 37885, "loss": 0.0279, "lr": 1.944282598777808e-06, "epoch": 0.9805991817341956, "percentage": 19.61, "elapsed_time": "0:10:20", "remaining_time": "0:42:24", "throughput": 5885.14, "total_tokens": 3653504} +{"current_steps": 7435, "total_steps": 37885, "loss": 0.3589, "lr": 1.9441308663295264e-06, "epoch": 0.9812590735119441, "percentage": 19.63, "elapsed_time": "0:10:21", "remaining_time": "0:42:23", "throughput": 5886.13, "total_tokens": 3656064} +{"current_steps": 7440, "total_steps": 37885, "loss": 0.4366, "lr": 1.9439789334963055e-06, "epoch": 0.9819189652896925, "percentage": 19.64, "elapsed_time": "0:10:21", "remaining_time": "0:42:23", "throughput": 5886.35, "total_tokens": 3658112} +{"current_steps": 7445, "total_steps": 37885, "loss": 0.1661, "lr": 1.9438268003103916e-06, "epoch": 0.982578857067441, "percentage": 19.65, "elapsed_time": "0:10:21", "remaining_time": "0:42:22", "throughput": 5887.71, "total_tokens": 3660928} +{"current_steps": 7450, "total_steps": 37885, "loss": 0.1425, "lr": 1.943674466804074e-06, "epoch": 0.9832387488451894, "percentage": 19.66, "elapsed_time": "0:10:22", "remaining_time": "0:42:21", "throughput": 5888.31, "total_tokens": 3663232} +{"current_steps": 7455, "total_steps": 37885, "loss": 0.0064, "lr": 1.9435219330096845e-06, "epoch": 0.9838986406229379, "percentage": 19.68, "elapsed_time": "0:10:22", "remaining_time": "0:42:20", "throughput": 5889.0, "total_tokens": 3665600} +{"current_steps": 7460, "total_steps": 37885, "loss": 0.0393, "lr": 1.9433691989595975e-06, "epoch": 0.9845585324006862, "percentage": 19.69, "elapsed_time": "0:10:22", "remaining_time": "0:42:19", "throughput": 5889.88, "total_tokens": 3668096} +{"current_steps": 7465, "total_steps": 37885, "loss": 0.0312, "lr": 1.943216264686229e-06, "epoch": 0.9852184241784347, "percentage": 19.7, "elapsed_time": "0:10:23", "remaining_time": "0:42:19", "throughput": 5890.89, "total_tokens": 3670656} +{"current_steps": 7470, "total_steps": 37885, "loss": 0.157, "lr": 1.943063130222038e-06, "epoch": 0.9858783159561831, "percentage": 19.72, "elapsed_time": "0:10:23", "remaining_time": "0:42:18", "throughput": 5891.55, "total_tokens": 3673024} +{"current_steps": 7475, "total_steps": 37885, "loss": 0.0569, "lr": 1.9429097955995275e-06, "epoch": 0.9865382077339316, "percentage": 19.73, "elapsed_time": "0:10:23", "remaining_time": "0:42:17", "throughput": 5892.7, "total_tokens": 3675712} +{"current_steps": 7480, "total_steps": 37885, "loss": 0.1377, "lr": 1.9427562608512406e-06, "epoch": 0.9871980995116801, "percentage": 19.74, "elapsed_time": "0:10:24", "remaining_time": "0:42:16", "throughput": 5893.42, "total_tokens": 3678080} +{"current_steps": 7485, "total_steps": 37885, "loss": 0.2043, "lr": 1.9426025260097645e-06, "epoch": 0.9878579912894285, "percentage": 19.76, "elapsed_time": "0:10:24", "remaining_time": "0:42:16", "throughput": 5894.12, "total_tokens": 3680448} +{"current_steps": 7490, "total_steps": 37885, "loss": 0.0405, "lr": 1.9424485911077278e-06, "epoch": 0.988517883067177, "percentage": 19.77, "elapsed_time": "0:10:24", "remaining_time": "0:42:15", "throughput": 5894.74, "total_tokens": 3682752} +{"current_steps": 7495, "total_steps": 37885, "loss": 0.1633, "lr": 1.9422944561778026e-06, "epoch": 0.9891777748449254, "percentage": 19.78, "elapsed_time": "0:10:25", "remaining_time": "0:42:14", "throughput": 5895.77, "total_tokens": 3685376} +{"current_steps": 7500, "total_steps": 37885, "loss": 0.0496, "lr": 1.9421401212527023e-06, "epoch": 0.9898376666226739, "percentage": 19.8, "elapsed_time": "0:10:25", "remaining_time": "0:42:13", "throughput": 5896.47, "total_tokens": 3687744} +{"current_steps": 7505, "total_steps": 37885, "loss": 0.1828, "lr": 1.9419855863651837e-06, "epoch": 0.9904975584004223, "percentage": 19.81, "elapsed_time": "0:10:25", "remaining_time": "0:42:12", "throughput": 5897.35, "total_tokens": 3690240} +{"current_steps": 7510, "total_steps": 37885, "loss": 0.1345, "lr": 1.941830851548046e-06, "epoch": 0.9911574501781708, "percentage": 19.82, "elapsed_time": "0:10:26", "remaining_time": "0:42:12", "throughput": 5898.22, "total_tokens": 3692736} +{"current_steps": 7515, "total_steps": 37885, "loss": 0.1963, "lr": 1.94167591683413e-06, "epoch": 0.9918173419559192, "percentage": 19.84, "elapsed_time": "0:10:26", "remaining_time": "0:42:11", "throughput": 5899.31, "total_tokens": 3695360} +{"current_steps": 7520, "total_steps": 37885, "loss": 0.0683, "lr": 1.94152078225632e-06, "epoch": 0.9924772337336677, "percentage": 19.85, "elapsed_time": "0:10:26", "remaining_time": "0:42:10", "throughput": 5900.18, "total_tokens": 3697856} +{"current_steps": 7525, "total_steps": 37885, "loss": 0.0825, "lr": 1.9413654478475415e-06, "epoch": 0.9931371255114161, "percentage": 19.86, "elapsed_time": "0:10:27", "remaining_time": "0:42:09", "throughput": 5900.87, "total_tokens": 3700224} +{"current_steps": 7530, "total_steps": 37885, "loss": 0.1488, "lr": 1.941209913640764e-06, "epoch": 0.9937970172891646, "percentage": 19.88, "elapsed_time": "0:10:27", "remaining_time": "0:42:09", "throughput": 5901.57, "total_tokens": 3702592} +{"current_steps": 7535, "total_steps": 37885, "loss": 0.1055, "lr": 1.9410541796689975e-06, "epoch": 0.994456909066913, "percentage": 19.89, "elapsed_time": "0:10:27", "remaining_time": "0:42:08", "throughput": 5902.19, "total_tokens": 3704896} +{"current_steps": 7540, "total_steps": 37885, "loss": 0.0121, "lr": 1.9408982459652963e-06, "epoch": 0.9951168008446615, "percentage": 19.9, "elapsed_time": "0:10:28", "remaining_time": "0:42:07", "throughput": 5902.89, "total_tokens": 3707264} +{"current_steps": 7545, "total_steps": 37885, "loss": 0.0858, "lr": 1.940742112562756e-06, "epoch": 0.99577669262241, "percentage": 19.92, "elapsed_time": "0:10:28", "remaining_time": "0:42:06", "throughput": 5903.84, "total_tokens": 3709824} +{"current_steps": 7550, "total_steps": 37885, "loss": 0.1293, "lr": 1.9405857794945142e-06, "epoch": 0.9964365844001584, "percentage": 19.93, "elapsed_time": "0:10:28", "remaining_time": "0:42:06", "throughput": 5904.53, "total_tokens": 3712192} +{"current_steps": 7555, "total_steps": 37885, "loss": 0.0699, "lr": 1.9404292467937525e-06, "epoch": 0.9970964761779069, "percentage": 19.94, "elapsed_time": "0:10:29", "remaining_time": "0:42:05", "throughput": 5905.67, "total_tokens": 3714880} +{"current_steps": 7560, "total_steps": 37885, "loss": 0.0584, "lr": 1.9402725144936926e-06, "epoch": 0.9977563679556553, "percentage": 19.96, "elapsed_time": "0:10:29", "remaining_time": "0:42:04", "throughput": 5906.25, "total_tokens": 3717184} +{"current_steps": 7565, "total_steps": 37885, "loss": 0.0402, "lr": 1.940115582627601e-06, "epoch": 0.9984162597334038, "percentage": 19.97, "elapsed_time": "0:10:29", "remaining_time": "0:42:03", "throughput": 5906.76, "total_tokens": 3719424} +{"current_steps": 7570, "total_steps": 37885, "loss": 0.0668, "lr": 1.9399584512287842e-06, "epoch": 0.9990761515111521, "percentage": 19.98, "elapsed_time": "0:10:30", "remaining_time": "0:42:02", "throughput": 5907.64, "total_tokens": 3721920} +{"current_steps": 7575, "total_steps": 37885, "loss": 0.123, "lr": 1.939801120330593e-06, "epoch": 0.9997360432889006, "percentage": 19.99, "elapsed_time": "0:10:30", "remaining_time": "0:42:02", "throughput": 5908.35, "total_tokens": 3724288} +{"current_steps": 7580, "total_steps": 37885, "loss": 0.0006, "lr": 1.9396435899664198e-06, "epoch": 1.0003959350666491, "percentage": 20.01, "elapsed_time": "0:10:30", "remaining_time": "0:42:01", "throughput": 5908.03, "total_tokens": 3726464} +{"current_steps": 7580, "total_steps": 37885, "eval_loss": 0.11427787691354752, "epoch": 1.0003959350666491, "percentage": 20.01, "elapsed_time": "0:10:38", "remaining_time": "0:42:32", "throughput": 5835.73, "total_tokens": 3726464} +{"current_steps": 7585, "total_steps": 37885, "loss": 0.0614, "lr": 1.9394858601696986e-06, "epoch": 1.0010558268443974, "percentage": 20.02, "elapsed_time": "0:11:14", "remaining_time": "0:44:55", "throughput": 5525.45, "total_tokens": 3728960} +{"current_steps": 7590, "total_steps": 37885, "loss": 0.0011, "lr": 1.9393279309739067e-06, "epoch": 1.001715718622146, "percentage": 20.03, "elapsed_time": "0:11:15", "remaining_time": "0:44:55", "throughput": 5526.63, "total_tokens": 3731648} +{"current_steps": 7595, "total_steps": 37885, "loss": 0.0326, "lr": 1.939169802412564e-06, "epoch": 1.0023756103998944, "percentage": 20.05, "elapsed_time": "0:11:15", "remaining_time": "0:44:54", "throughput": 5527.54, "total_tokens": 3734144} +{"current_steps": 7600, "total_steps": 37885, "loss": 0.1584, "lr": 1.939011474519231e-06, "epoch": 1.003035502177643, "percentage": 20.06, "elapsed_time": "0:11:15", "remaining_time": "0:44:53", "throughput": 5528.55, "total_tokens": 3736704} +{"current_steps": 7605, "total_steps": 37885, "loss": 0.1347, "lr": 1.938852947327513e-06, "epoch": 1.0036953939553914, "percentage": 20.07, "elapsed_time": "0:11:16", "remaining_time": "0:44:52", "throughput": 5529.65, "total_tokens": 3739328} +{"current_steps": 7610, "total_steps": 37885, "loss": 0.1089, "lr": 1.938694220871055e-06, "epoch": 1.0043552857331397, "percentage": 20.09, "elapsed_time": "0:11:16", "remaining_time": "0:44:51", "throughput": 5530.51, "total_tokens": 3741760} +{"current_steps": 7615, "total_steps": 37885, "loss": 0.0551, "lr": 1.938535295183547e-06, "epoch": 1.0050151775108882, "percentage": 20.1, "elapsed_time": "0:11:16", "remaining_time": "0:44:50", "throughput": 5531.64, "total_tokens": 3744384} +{"current_steps": 7620, "total_steps": 37885, "loss": 0.1567, "lr": 1.938376170298718e-06, "epoch": 1.0056750692886367, "percentage": 20.11, "elapsed_time": "0:11:17", "remaining_time": "0:44:49", "throughput": 5532.46, "total_tokens": 3746816} +{"current_steps": 7625, "total_steps": 37885, "loss": 0.0361, "lr": 1.9382168462503425e-06, "epoch": 1.0063349610663852, "percentage": 20.13, "elapsed_time": "0:11:17", "remaining_time": "0:44:48", "throughput": 5533.66, "total_tokens": 3749504} +{"current_steps": 7630, "total_steps": 37885, "loss": 0.052, "lr": 1.9380573230722354e-06, "epoch": 1.0069948528441335, "percentage": 20.14, "elapsed_time": "0:11:17", "remaining_time": "0:44:48", "throughput": 5534.51, "total_tokens": 3751936} +{"current_steps": 7635, "total_steps": 37885, "loss": 0.0017, "lr": 1.9378976007982543e-06, "epoch": 1.007654744621882, "percentage": 20.15, "elapsed_time": "0:11:18", "remaining_time": "0:44:47", "throughput": 5535.36, "total_tokens": 3754368} +{"current_steps": 7640, "total_steps": 37885, "loss": 0.0029, "lr": 1.9377376794622992e-06, "epoch": 1.0083146363996305, "percentage": 20.17, "elapsed_time": "0:11:18", "remaining_time": "0:44:46", "throughput": 5536.1, "total_tokens": 3756736} +{"current_steps": 7645, "total_steps": 37885, "loss": 0.0945, "lr": 1.937577559098312e-06, "epoch": 1.008974528177379, "percentage": 20.18, "elapsed_time": "0:11:18", "remaining_time": "0:44:45", "throughput": 5537.25, "total_tokens": 3759360} +{"current_steps": 7650, "total_steps": 37885, "loss": 0.0614, "lr": 1.9374172397402774e-06, "epoch": 1.0096344199551273, "percentage": 20.19, "elapsed_time": "0:11:19", "remaining_time": "0:44:44", "throughput": 5537.78, "total_tokens": 3761536} +{"current_steps": 7655, "total_steps": 37885, "loss": 0.1333, "lr": 1.937256721422222e-06, "epoch": 1.0102943117328758, "percentage": 20.21, "elapsed_time": "0:11:19", "remaining_time": "0:44:43", "throughput": 5538.68, "total_tokens": 3763968} +{"current_steps": 7660, "total_steps": 37885, "loss": 0.1473, "lr": 1.9370960041782144e-06, "epoch": 1.0109542035106243, "percentage": 20.22, "elapsed_time": "0:11:19", "remaining_time": "0:44:42", "throughput": 5539.49, "total_tokens": 3766336} +{"current_steps": 7665, "total_steps": 37885, "loss": 0.1232, "lr": 1.936935088042366e-06, "epoch": 1.0116140952883728, "percentage": 20.23, "elapsed_time": "0:11:20", "remaining_time": "0:44:41", "throughput": 5540.46, "total_tokens": 3768832} +{"current_steps": 7670, "total_steps": 37885, "loss": 0.066, "lr": 1.9367739730488295e-06, "epoch": 1.0122739870661213, "percentage": 20.25, "elapsed_time": "0:11:20", "remaining_time": "0:44:40", "throughput": 5541.38, "total_tokens": 3771264} +{"current_steps": 7675, "total_steps": 37885, "loss": 0.0628, "lr": 1.9366126592318012e-06, "epoch": 1.0129338788438695, "percentage": 20.26, "elapsed_time": "0:11:20", "remaining_time": "0:44:40", "throughput": 5542.36, "total_tokens": 3773760} +{"current_steps": 7680, "total_steps": 37885, "loss": 0.069, "lr": 1.936451146625518e-06, "epoch": 1.013593770621618, "percentage": 20.27, "elapsed_time": "0:11:21", "remaining_time": "0:44:39", "throughput": 5543.07, "total_tokens": 3776064} +{"current_steps": 7685, "total_steps": 37885, "loss": 0.0009, "lr": 1.9362894352642606e-06, "epoch": 1.0142536623993665, "percentage": 20.29, "elapsed_time": "0:11:21", "remaining_time": "0:44:38", "throughput": 5543.97, "total_tokens": 3778496} +{"current_steps": 7690, "total_steps": 37885, "loss": 0.0102, "lr": 1.9361275251823507e-06, "epoch": 1.014913554177115, "percentage": 20.3, "elapsed_time": "0:11:21", "remaining_time": "0:44:37", "throughput": 5544.87, "total_tokens": 3780928} +{"current_steps": 7695, "total_steps": 37885, "loss": 0.1457, "lr": 1.935965416414152e-06, "epoch": 1.0155734459548633, "percentage": 20.31, "elapsed_time": "0:11:22", "remaining_time": "0:44:36", "throughput": 5545.74, "total_tokens": 3783360} +{"current_steps": 7700, "total_steps": 37885, "loss": 0.0004, "lr": 1.935803108994072e-06, "epoch": 1.0162333377326118, "percentage": 20.32, "elapsed_time": "0:11:22", "remaining_time": "0:44:35", "throughput": 5546.46, "total_tokens": 3785664} +{"current_steps": 7705, "total_steps": 37885, "loss": 0.0002, "lr": 1.9356406029565584e-06, "epoch": 1.0168932295103603, "percentage": 20.34, "elapsed_time": "0:11:22", "remaining_time": "0:44:34", "throughput": 5547.6, "total_tokens": 3788288} +{"current_steps": 7710, "total_steps": 37885, "loss": 0.2777, "lr": 1.935477898336102e-06, "epoch": 1.0175531212881088, "percentage": 20.35, "elapsed_time": "0:11:23", "remaining_time": "0:44:33", "throughput": 5548.6, "total_tokens": 3790784} +{"current_steps": 7715, "total_steps": 37885, "loss": 0.0015, "lr": 1.935314995167236e-06, "epoch": 1.018213013065857, "percentage": 20.36, "elapsed_time": "0:11:23", "remaining_time": "0:44:32", "throughput": 5549.43, "total_tokens": 3793152} +{"current_steps": 7720, "total_steps": 37885, "loss": 0.0003, "lr": 1.9351518934845355e-06, "epoch": 1.0188729048436056, "percentage": 20.38, "elapsed_time": "0:11:23", "remaining_time": "0:44:32", "throughput": 5550.48, "total_tokens": 3795712} +{"current_steps": 7725, "total_steps": 37885, "loss": 0.0478, "lr": 1.934988593322617e-06, "epoch": 1.019532796621354, "percentage": 20.39, "elapsed_time": "0:11:24", "remaining_time": "0:44:31", "throughput": 5551.3, "total_tokens": 3798080} +{"current_steps": 7730, "total_steps": 37885, "loss": 0.1037, "lr": 1.934825094716141e-06, "epoch": 1.0201926883991026, "percentage": 20.4, "elapsed_time": "0:11:24", "remaining_time": "0:44:30", "throughput": 5552.35, "total_tokens": 3800640} +{"current_steps": 7735, "total_steps": 37885, "loss": 0.1827, "lr": 1.9346613976998075e-06, "epoch": 1.020852580176851, "percentage": 20.42, "elapsed_time": "0:11:24", "remaining_time": "0:44:29", "throughput": 5553.09, "total_tokens": 3802944} +{"current_steps": 7740, "total_steps": 37885, "loss": 0.0798, "lr": 1.9344975023083606e-06, "epoch": 1.0215124719545994, "percentage": 20.43, "elapsed_time": "0:11:25", "remaining_time": "0:44:28", "throughput": 5554.15, "total_tokens": 3805504} +{"current_steps": 7745, "total_steps": 37885, "loss": 0.1328, "lr": 1.9343334085765862e-06, "epoch": 1.0221723637323479, "percentage": 20.44, "elapsed_time": "0:11:25", "remaining_time": "0:44:27", "throughput": 5554.78, "total_tokens": 3807744} +{"current_steps": 7750, "total_steps": 37885, "loss": 0.028, "lr": 1.9341691165393116e-06, "epoch": 1.0228322555100964, "percentage": 20.46, "elapsed_time": "0:11:25", "remaining_time": "0:44:26", "throughput": 5555.54, "total_tokens": 3810112} +{"current_steps": 7755, "total_steps": 37885, "loss": 0.1645, "lr": 1.9340046262314065e-06, "epoch": 1.0234921472878449, "percentage": 20.47, "elapsed_time": "0:11:26", "remaining_time": "0:44:25", "throughput": 5556.51, "total_tokens": 3812608} +{"current_steps": 7760, "total_steps": 37885, "loss": 0.0013, "lr": 1.9338399376877835e-06, "epoch": 1.0241520390655932, "percentage": 20.48, "elapsed_time": "0:11:26", "remaining_time": "0:44:24", "throughput": 5557.2, "total_tokens": 3814912} +{"current_steps": 7765, "total_steps": 37885, "loss": 0.0006, "lr": 1.9336750509433958e-06, "epoch": 1.0248119308433417, "percentage": 20.5, "elapsed_time": "0:11:26", "remaining_time": "0:44:24", "throughput": 5558.44, "total_tokens": 3817600} +{"current_steps": 7770, "total_steps": 37885, "loss": 0.1336, "lr": 1.93350996603324e-06, "epoch": 1.0254718226210902, "percentage": 20.51, "elapsed_time": "0:11:27", "remaining_time": "0:44:23", "throughput": 5559.14, "total_tokens": 3819904} +{"current_steps": 7775, "total_steps": 37885, "loss": 0.0452, "lr": 1.933344682992353e-06, "epoch": 1.0261317143988387, "percentage": 20.52, "elapsed_time": "0:11:27", "remaining_time": "0:44:22", "throughput": 5559.89, "total_tokens": 3822272} +{"current_steps": 7780, "total_steps": 37885, "loss": 0.1192, "lr": 1.9331792018558165e-06, "epoch": 1.026791606176587, "percentage": 20.54, "elapsed_time": "0:11:27", "remaining_time": "0:44:21", "throughput": 5560.5, "total_tokens": 3824512} +{"current_steps": 7785, "total_steps": 37885, "loss": 0.0616, "lr": 1.933013522658752e-06, "epoch": 1.0274514979543354, "percentage": 20.55, "elapsed_time": "0:11:28", "remaining_time": "0:44:20", "throughput": 5561.55, "total_tokens": 3827072} +{"current_steps": 7790, "total_steps": 37885, "loss": 0.1669, "lr": 1.9328476454363235e-06, "epoch": 1.028111389732084, "percentage": 20.56, "elapsed_time": "0:11:28", "remaining_time": "0:44:19", "throughput": 5562.62, "total_tokens": 3829632} +{"current_steps": 7795, "total_steps": 37885, "loss": 0.0014, "lr": 1.932681570223737e-06, "epoch": 1.0287712815098324, "percentage": 20.58, "elapsed_time": "0:11:28", "remaining_time": "0:44:18", "throughput": 5563.42, "total_tokens": 3832000} +{"current_steps": 7800, "total_steps": 37885, "loss": 0.0993, "lr": 1.9325152970562418e-06, "epoch": 1.029431173287581, "percentage": 20.59, "elapsed_time": "0:11:29", "remaining_time": "0:44:17", "throughput": 5564.55, "total_tokens": 3834624} +{"current_steps": 7805, "total_steps": 37885, "loss": 0.1106, "lr": 1.9323488259691273e-06, "epoch": 1.0300910650653292, "percentage": 20.6, "elapsed_time": "0:11:29", "remaining_time": "0:44:17", "throughput": 5565.34, "total_tokens": 3836992} +{"current_steps": 7810, "total_steps": 37885, "loss": 0.1535, "lr": 1.932182156997726e-06, "epoch": 1.0307509568430777, "percentage": 20.62, "elapsed_time": "0:11:29", "remaining_time": "0:44:16", "throughput": 5566.34, "total_tokens": 3839488} +{"current_steps": 7815, "total_steps": 37885, "loss": 0.0557, "lr": 1.9320152901774124e-06, "epoch": 1.0314108486208262, "percentage": 20.63, "elapsed_time": "0:11:30", "remaining_time": "0:44:15", "throughput": 5567.46, "total_tokens": 3842112} +{"current_steps": 7820, "total_steps": 37885, "loss": 0.0745, "lr": 1.9318482255436022e-06, "epoch": 1.0320707403985747, "percentage": 20.64, "elapsed_time": "0:11:30", "remaining_time": "0:44:14", "throughput": 5567.99, "total_tokens": 3844288} +{"current_steps": 7825, "total_steps": 37885, "loss": 0.1813, "lr": 1.9316809631317544e-06, "epoch": 1.032730632176323, "percentage": 20.65, "elapsed_time": "0:11:30", "remaining_time": "0:44:13", "throughput": 5568.79, "total_tokens": 3846656} +{"current_steps": 7830, "total_steps": 37885, "loss": 0.0014, "lr": 1.931513502977369e-06, "epoch": 1.0333905239540715, "percentage": 20.67, "elapsed_time": "0:11:31", "remaining_time": "0:44:12", "throughput": 5570.0, "total_tokens": 3849344} +{"current_steps": 7835, "total_steps": 37885, "loss": 0.0637, "lr": 1.931345845115988e-06, "epoch": 1.03405041573182, "percentage": 20.68, "elapsed_time": "0:11:31", "remaining_time": "0:44:11", "throughput": 5570.81, "total_tokens": 3851712} +{"current_steps": 7840, "total_steps": 37885, "loss": 0.0103, "lr": 1.931177989583195e-06, "epoch": 1.0347103075095685, "percentage": 20.69, "elapsed_time": "0:11:31", "remaining_time": "0:44:10", "throughput": 5571.59, "total_tokens": 3854080} +{"current_steps": 7845, "total_steps": 37885, "loss": 0.063, "lr": 1.9310099364146174e-06, "epoch": 1.0353701992873168, "percentage": 20.71, "elapsed_time": "0:11:32", "remaining_time": "0:44:10", "throughput": 5572.55, "total_tokens": 3856576} +{"current_steps": 7850, "total_steps": 37885, "loss": 0.1425, "lr": 1.930841685645922e-06, "epoch": 1.0360300910650653, "percentage": 20.72, "elapsed_time": "0:11:32", "remaining_time": "0:44:09", "throughput": 5573.44, "total_tokens": 3859008} +{"current_steps": 7855, "total_steps": 37885, "loss": 0.0006, "lr": 1.93067323731282e-06, "epoch": 1.0366899828428138, "percentage": 20.73, "elapsed_time": "0:11:32", "remaining_time": "0:44:08", "throughput": 5574.22, "total_tokens": 3861376} +{"current_steps": 7860, "total_steps": 37885, "loss": 0.0849, "lr": 1.930504591451063e-06, "epoch": 1.0373498746205623, "percentage": 20.75, "elapsed_time": "0:11:33", "remaining_time": "0:44:07", "throughput": 5575.21, "total_tokens": 3863872} +{"current_steps": 7865, "total_steps": 37885, "loss": 0.073, "lr": 1.9303357480964445e-06, "epoch": 1.0380097663983108, "percentage": 20.76, "elapsed_time": "0:11:33", "remaining_time": "0:44:06", "throughput": 5576.08, "total_tokens": 3866304} +{"current_steps": 7870, "total_steps": 37885, "loss": 0.0006, "lr": 1.9301667072848002e-06, "epoch": 1.038669658176059, "percentage": 20.77, "elapsed_time": "0:11:33", "remaining_time": "0:44:05", "throughput": 5577.13, "total_tokens": 3868864} +{"current_steps": 7875, "total_steps": 37885, "loss": 0.0896, "lr": 1.929997469052008e-06, "epoch": 1.0393295499538076, "percentage": 20.79, "elapsed_time": "0:11:34", "remaining_time": "0:44:04", "throughput": 5577.57, "total_tokens": 3870976} +{"current_steps": 7880, "total_steps": 37885, "loss": 0.132, "lr": 1.929828033433988e-06, "epoch": 1.039989441731556, "percentage": 20.8, "elapsed_time": "0:11:34", "remaining_time": "0:44:03", "throughput": 5578.44, "total_tokens": 3873408} +{"current_steps": 7885, "total_steps": 37885, "loss": 0.2669, "lr": 1.9296584004667005e-06, "epoch": 1.0406493335093046, "percentage": 20.81, "elapsed_time": "0:11:34", "remaining_time": "0:44:03", "throughput": 5579.21, "total_tokens": 3875776} +{"current_steps": 7890, "total_steps": 37885, "loss": 0.0612, "lr": 1.92948857018615e-06, "epoch": 1.0413092252870528, "percentage": 20.83, "elapsed_time": "0:11:35", "remaining_time": "0:44:02", "throughput": 5579.95, "total_tokens": 3878144} +{"current_steps": 7895, "total_steps": 37885, "loss": 0.1703, "lr": 1.929318542628381e-06, "epoch": 1.0419691170648013, "percentage": 20.84, "elapsed_time": "0:11:35", "remaining_time": "0:44:01", "throughput": 5580.74, "total_tokens": 3880512} +{"current_steps": 7900, "total_steps": 37885, "loss": 0.0021, "lr": 1.9291483178294813e-06, "epoch": 1.0426290088425498, "percentage": 20.85, "elapsed_time": "0:11:35", "remaining_time": "0:44:00", "throughput": 5581.51, "total_tokens": 3882880} +{"current_steps": 7905, "total_steps": 37885, "loss": 0.0301, "lr": 1.928977895825579e-06, "epoch": 1.0432889006202983, "percentage": 20.87, "elapsed_time": "0:11:35", "remaining_time": "0:43:59", "throughput": 5582.36, "total_tokens": 3885312} +{"current_steps": 7910, "total_steps": 37885, "loss": 0.0624, "lr": 1.928807276652846e-06, "epoch": 1.0439487923980466, "percentage": 20.88, "elapsed_time": "0:11:36", "remaining_time": "0:43:58", "throughput": 5583.22, "total_tokens": 3887744} +{"current_steps": 7915, "total_steps": 37885, "loss": 0.0541, "lr": 1.928636460347494e-06, "epoch": 1.044608684175795, "percentage": 20.89, "elapsed_time": "0:11:36", "remaining_time": "0:43:57", "throughput": 5583.91, "total_tokens": 3890048} +{"current_steps": 7920, "total_steps": 37885, "loss": 0.0159, "lr": 1.928465446945778e-06, "epoch": 1.0452685759535436, "percentage": 20.91, "elapsed_time": "0:11:36", "remaining_time": "0:43:56", "throughput": 5584.77, "total_tokens": 3892480} +{"current_steps": 7925, "total_steps": 37885, "loss": 0.0013, "lr": 1.9282942364839947e-06, "epoch": 1.045928467731292, "percentage": 20.92, "elapsed_time": "0:11:37", "remaining_time": "0:43:56", "throughput": 5585.45, "total_tokens": 3894784} +{"current_steps": 7930, "total_steps": 37885, "loss": 0.0775, "lr": 1.9281228289984816e-06, "epoch": 1.0465883595090406, "percentage": 20.93, "elapsed_time": "0:11:37", "remaining_time": "0:43:55", "throughput": 5586.63, "total_tokens": 3897472} +{"current_steps": 7935, "total_steps": 37885, "loss": 0.078, "lr": 1.927951224525619e-06, "epoch": 1.047248251286789, "percentage": 20.94, "elapsed_time": "0:11:37", "remaining_time": "0:43:54", "throughput": 5587.66, "total_tokens": 3900032} +{"current_steps": 7940, "total_steps": 37885, "loss": 0.0335, "lr": 1.9277794231018286e-06, "epoch": 1.0479081430645374, "percentage": 20.96, "elapsed_time": "0:11:38", "remaining_time": "0:43:53", "throughput": 5588.7, "total_tokens": 3902592} +{"current_steps": 7945, "total_steps": 37885, "loss": 0.0915, "lr": 1.927607424763574e-06, "epoch": 1.048568034842286, "percentage": 20.97, "elapsed_time": "0:11:38", "remaining_time": "0:43:52", "throughput": 5589.38, "total_tokens": 3904896} +{"current_steps": 7950, "total_steps": 37885, "loss": 0.0017, "lr": 1.927435229547361e-06, "epoch": 1.0492279266200344, "percentage": 20.98, "elapsed_time": "0:11:38", "remaining_time": "0:43:51", "throughput": 5590.05, "total_tokens": 3907200} +{"current_steps": 7955, "total_steps": 37885, "loss": 0.0768, "lr": 1.9272628374897366e-06, "epoch": 1.0498878183977827, "percentage": 21.0, "elapsed_time": "0:11:39", "remaining_time": "0:43:51", "throughput": 5591.21, "total_tokens": 3909888} +{"current_steps": 7960, "total_steps": 37885, "loss": 0.0445, "lr": 1.9270902486272892e-06, "epoch": 1.0505477101755312, "percentage": 21.01, "elapsed_time": "0:11:39", "remaining_time": "0:43:50", "throughput": 5592.01, "total_tokens": 3912320} +{"current_steps": 7965, "total_steps": 37885, "loss": 0.1083, "lr": 1.92691746299665e-06, "epoch": 1.0512076019532797, "percentage": 21.02, "elapsed_time": "0:11:39", "remaining_time": "0:43:49", "throughput": 5593.07, "total_tokens": 3914944} +{"current_steps": 7970, "total_steps": 37885, "loss": 0.0833, "lr": 1.9267444806344917e-06, "epoch": 1.0518674937310282, "percentage": 21.04, "elapsed_time": "0:11:40", "remaining_time": "0:43:48", "throughput": 5593.87, "total_tokens": 3917376} +{"current_steps": 7975, "total_steps": 37885, "loss": 0.0865, "lr": 1.9265713015775285e-06, "epoch": 1.0525273855087764, "percentage": 21.05, "elapsed_time": "0:11:40", "remaining_time": "0:43:47", "throughput": 5594.79, "total_tokens": 3919872} +{"current_steps": 7980, "total_steps": 37885, "loss": 0.1148, "lr": 1.926397925862516e-06, "epoch": 1.053187277286525, "percentage": 21.06, "elapsed_time": "0:11:40", "remaining_time": "0:43:46", "throughput": 5595.68, "total_tokens": 3922368} +{"current_steps": 7985, "total_steps": 37885, "loss": 0.0591, "lr": 1.9262243535262527e-06, "epoch": 1.0538471690642734, "percentage": 21.08, "elapsed_time": "0:11:41", "remaining_time": "0:43:46", "throughput": 5596.81, "total_tokens": 3925056} +{"current_steps": 7990, "total_steps": 37885, "loss": 0.2027, "lr": 1.926050584605577e-06, "epoch": 1.054507060842022, "percentage": 21.09, "elapsed_time": "0:11:41", "remaining_time": "0:43:45", "throughput": 5597.69, "total_tokens": 3927552} +{"current_steps": 7995, "total_steps": 37885, "loss": 0.0009, "lr": 1.9258766191373706e-06, "epoch": 1.0551669526197704, "percentage": 21.1, "elapsed_time": "0:11:41", "remaining_time": "0:43:44", "throughput": 5598.77, "total_tokens": 3930176} +{"current_steps": 8000, "total_steps": 37885, "loss": 0.0005, "lr": 1.9257024571585565e-06, "epoch": 1.0558268443975187, "percentage": 21.12, "elapsed_time": "0:11:42", "remaining_time": "0:43:43", "throughput": 5599.68, "total_tokens": 3932672} +{"current_steps": 8005, "total_steps": 37885, "loss": 0.2338, "lr": 1.9255280987060995e-06, "epoch": 1.0564867361752672, "percentage": 21.13, "elapsed_time": "0:11:42", "remaining_time": "0:43:42", "throughput": 5600.22, "total_tokens": 3934912} +{"current_steps": 8010, "total_steps": 37885, "loss": 0.0681, "lr": 1.9253535438170056e-06, "epoch": 1.0571466279530157, "percentage": 21.14, "elapsed_time": "0:11:42", "remaining_time": "0:43:41", "throughput": 5600.83, "total_tokens": 3937216} +{"current_steps": 8015, "total_steps": 37885, "loss": 0.145, "lr": 1.9251787925283228e-06, "epoch": 1.0578065197307642, "percentage": 21.16, "elapsed_time": "0:11:43", "remaining_time": "0:43:41", "throughput": 5601.79, "total_tokens": 3939776} +{"current_steps": 8020, "total_steps": 37885, "loss": 0.1157, "lr": 1.925003844877141e-06, "epoch": 1.0584664115085125, "percentage": 21.17, "elapsed_time": "0:11:43", "remaining_time": "0:43:40", "throughput": 5602.18, "total_tokens": 3941888} +{"current_steps": 8025, "total_steps": 37885, "loss": 0.0852, "lr": 1.9248287009005914e-06, "epoch": 1.059126303286261, "percentage": 21.18, "elapsed_time": "0:11:43", "remaining_time": "0:43:39", "throughput": 5602.83, "total_tokens": 3944192} +{"current_steps": 8030, "total_steps": 37885, "loss": 0.0706, "lr": 1.9246533606358475e-06, "epoch": 1.0597861950640095, "percentage": 21.2, "elapsed_time": "0:11:44", "remaining_time": "0:43:38", "throughput": 5603.85, "total_tokens": 3946816} +{"current_steps": 8035, "total_steps": 37885, "loss": 0.1036, "lr": 1.9244778241201232e-06, "epoch": 1.060446086841758, "percentage": 21.21, "elapsed_time": "0:11:44", "remaining_time": "0:43:37", "throughput": 5604.89, "total_tokens": 3949440} +{"current_steps": 8040, "total_steps": 37885, "loss": 0.0058, "lr": 1.9243020913906753e-06, "epoch": 1.0611059786195065, "percentage": 21.22, "elapsed_time": "0:11:44", "remaining_time": "0:43:36", "throughput": 5605.81, "total_tokens": 3952000} +{"current_steps": 8045, "total_steps": 37885, "loss": 0.0008, "lr": 1.924126162484802e-06, "epoch": 1.0617658703972548, "percentage": 21.24, "elapsed_time": "0:11:45", "remaining_time": "0:43:36", "throughput": 5606.31, "total_tokens": 3954240} +{"current_steps": 8050, "total_steps": 37885, "loss": 0.1045, "lr": 1.9239500374398427e-06, "epoch": 1.0624257621750033, "percentage": 21.25, "elapsed_time": "0:11:45", "remaining_time": "0:43:35", "throughput": 5607.01, "total_tokens": 3956608} +{"current_steps": 8055, "total_steps": 37885, "loss": 0.0012, "lr": 1.9237737162931785e-06, "epoch": 1.0630856539527518, "percentage": 21.26, "elapsed_time": "0:11:45", "remaining_time": "0:43:34", "throughput": 5607.89, "total_tokens": 3959104} +{"current_steps": 8060, "total_steps": 37885, "loss": 0.1183, "lr": 1.9235971990822323e-06, "epoch": 1.0637455457305003, "percentage": 21.27, "elapsed_time": "0:11:46", "remaining_time": "0:43:33", "throughput": 5608.87, "total_tokens": 3961664} +{"current_steps": 8065, "total_steps": 37885, "loss": 0.0608, "lr": 1.923420485844469e-06, "epoch": 1.0644054375082486, "percentage": 21.29, "elapsed_time": "0:11:46", "remaining_time": "0:43:32", "throughput": 5609.97, "total_tokens": 3964352} +{"current_steps": 8070, "total_steps": 37885, "loss": 0.0015, "lr": 1.9232435766173944e-06, "epoch": 1.065065329285997, "percentage": 21.3, "elapsed_time": "0:11:46", "remaining_time": "0:43:32", "throughput": 5610.59, "total_tokens": 3966656} +{"current_steps": 8075, "total_steps": 37885, "loss": 0.1006, "lr": 1.9230664714385567e-06, "epoch": 1.0657252210637456, "percentage": 21.31, "elapsed_time": "0:11:47", "remaining_time": "0:43:31", "throughput": 5611.08, "total_tokens": 3968896} +{"current_steps": 8080, "total_steps": 37885, "loss": 0.028, "lr": 1.922889170345544e-06, "epoch": 1.066385112841494, "percentage": 21.33, "elapsed_time": "0:11:47", "remaining_time": "0:43:30", "throughput": 5611.83, "total_tokens": 3971328} +{"current_steps": 8085, "total_steps": 37885, "loss": 0.1022, "lr": 1.9227116733759883e-06, "epoch": 1.0670450046192423, "percentage": 21.34, "elapsed_time": "0:11:48", "remaining_time": "0:43:29", "throughput": 5612.48, "total_tokens": 3973696} +{"current_steps": 8090, "total_steps": 37885, "loss": 0.1266, "lr": 1.922533980567562e-06, "epoch": 1.0677048963969908, "percentage": 21.35, "elapsed_time": "0:11:48", "remaining_time": "0:43:28", "throughput": 5613.34, "total_tokens": 3976192} +{"current_steps": 8095, "total_steps": 37885, "loss": 0.0802, "lr": 1.9223560919579782e-06, "epoch": 1.0683647881747393, "percentage": 21.37, "elapsed_time": "0:11:48", "remaining_time": "0:43:28", "throughput": 5614.51, "total_tokens": 3978944} +{"current_steps": 8100, "total_steps": 37885, "loss": 0.0701, "lr": 1.922178007584993e-06, "epoch": 1.0690246799524878, "percentage": 21.38, "elapsed_time": "0:11:49", "remaining_time": "0:43:27", "throughput": 5615.28, "total_tokens": 3981376} +{"current_steps": 8105, "total_steps": 37885, "loss": 0.0006, "lr": 1.921999727486404e-06, "epoch": 1.0696845717302363, "percentage": 21.39, "elapsed_time": "0:11:49", "remaining_time": "0:43:26", "throughput": 5615.95, "total_tokens": 3983744} +{"current_steps": 8110, "total_steps": 37885, "loss": 0.0604, "lr": 1.9218212517000495e-06, "epoch": 1.0703444635079846, "percentage": 21.41, "elapsed_time": "0:11:49", "remaining_time": "0:43:25", "throughput": 5616.56, "total_tokens": 3986048} +{"current_steps": 8115, "total_steps": 37885, "loss": 0.0162, "lr": 1.9216425802638095e-06, "epoch": 1.0710043552857331, "percentage": 21.42, "elapsed_time": "0:11:50", "remaining_time": "0:43:24", "throughput": 5617.7, "total_tokens": 3988736} +{"current_steps": 8120, "total_steps": 37885, "loss": 0.046, "lr": 1.9214637132156056e-06, "epoch": 1.0716642470634816, "percentage": 21.43, "elapsed_time": "0:11:50", "remaining_time": "0:43:23", "throughput": 5618.69, "total_tokens": 3991360} +{"current_steps": 8125, "total_steps": 37885, "loss": 0.0479, "lr": 1.9212846505934018e-06, "epoch": 1.0723241388412301, "percentage": 21.45, "elapsed_time": "0:11:50", "remaining_time": "0:43:23", "throughput": 5619.97, "total_tokens": 3994176} +{"current_steps": 8130, "total_steps": 37885, "loss": 0.0754, "lr": 1.921105392435202e-06, "epoch": 1.0729840306189784, "percentage": 21.46, "elapsed_time": "0:11:51", "remaining_time": "0:43:22", "throughput": 5620.46, "total_tokens": 3996416} +{"current_steps": 8135, "total_steps": 37885, "loss": 0.0023, "lr": 1.9209259387790526e-06, "epoch": 1.073643922396727, "percentage": 21.47, "elapsed_time": "0:11:51", "remaining_time": "0:43:21", "throughput": 5621.4, "total_tokens": 3998976} +{"current_steps": 8140, "total_steps": 37885, "loss": 0.1178, "lr": 1.920746289663042e-06, "epoch": 1.0743038141744754, "percentage": 21.49, "elapsed_time": "0:11:51", "remaining_time": "0:43:20", "throughput": 5622.07, "total_tokens": 4001344} +{"current_steps": 8145, "total_steps": 37885, "loss": 0.0384, "lr": 1.9205664451252986e-06, "epoch": 1.074963705952224, "percentage": 21.5, "elapsed_time": "0:11:52", "remaining_time": "0:43:19", "throughput": 5622.75, "total_tokens": 4003712} +{"current_steps": 8150, "total_steps": 37885, "loss": 0.0743, "lr": 1.9203864052039935e-06, "epoch": 1.0756235977299722, "percentage": 21.51, "elapsed_time": "0:11:52", "remaining_time": "0:43:19", "throughput": 5623.53, "total_tokens": 4006144} +{"current_steps": 8155, "total_steps": 37885, "loss": 0.0612, "lr": 1.9202061699373386e-06, "epoch": 1.0762834895077207, "percentage": 21.53, "elapsed_time": "0:11:52", "remaining_time": "0:43:18", "throughput": 5624.38, "total_tokens": 4008640} +{"current_steps": 8160, "total_steps": 37885, "loss": 0.0445, "lr": 1.9200257393635878e-06, "epoch": 1.0769433812854692, "percentage": 21.54, "elapsed_time": "0:11:53", "remaining_time": "0:43:17", "throughput": 5625.67, "total_tokens": 4011456} +{"current_steps": 8165, "total_steps": 37885, "loss": 0.0008, "lr": 1.9198451135210365e-06, "epoch": 1.0776032730632177, "percentage": 21.55, "elapsed_time": "0:11:53", "remaining_time": "0:43:16", "throughput": 5626.78, "total_tokens": 4014144} +{"current_steps": 8170, "total_steps": 37885, "loss": 0.0969, "lr": 1.919664292448021e-06, "epoch": 1.0782631648409662, "percentage": 21.57, "elapsed_time": "0:11:53", "remaining_time": "0:43:15", "throughput": 5627.2, "total_tokens": 4016320} +{"current_steps": 8175, "total_steps": 37885, "loss": 0.0018, "lr": 1.9194832761829184e-06, "epoch": 1.0789230566187145, "percentage": 21.58, "elapsed_time": "0:11:54", "remaining_time": "0:43:15", "throughput": 5628.33, "total_tokens": 4019008} +{"current_steps": 8180, "total_steps": 37885, "loss": 0.1604, "lr": 1.919302064764149e-06, "epoch": 1.079582948396463, "percentage": 21.59, "elapsed_time": "0:11:54", "remaining_time": "0:43:14", "throughput": 5629.27, "total_tokens": 4021568} +{"current_steps": 8185, "total_steps": 37885, "loss": 0.0614, "lr": 1.9191206582301737e-06, "epoch": 1.0802428401742115, "percentage": 21.6, "elapsed_time": "0:11:54", "remaining_time": "0:43:13", "throughput": 5630.14, "total_tokens": 4024064} +{"current_steps": 8190, "total_steps": 37885, "loss": 0.2029, "lr": 1.9189390566194942e-06, "epoch": 1.08090273195196, "percentage": 21.62, "elapsed_time": "0:11:55", "remaining_time": "0:43:12", "throughput": 5630.9, "total_tokens": 4026496} +{"current_steps": 8195, "total_steps": 37885, "loss": 0.0022, "lr": 1.9187572599706547e-06, "epoch": 1.0815626237297082, "percentage": 21.63, "elapsed_time": "0:11:55", "remaining_time": "0:43:11", "throughput": 5631.74, "total_tokens": 4028992} +{"current_steps": 8200, "total_steps": 37885, "loss": 0.2369, "lr": 1.9185752683222395e-06, "epoch": 1.0822225155074567, "percentage": 21.64, "elapsed_time": "0:11:55", "remaining_time": "0:43:11", "throughput": 5632.32, "total_tokens": 4031296} +{"current_steps": 8205, "total_steps": 37885, "loss": 0.0804, "lr": 1.9183930817128755e-06, "epoch": 1.0828824072852052, "percentage": 21.66, "elapsed_time": "0:11:56", "remaining_time": "0:43:10", "throughput": 5633.02, "total_tokens": 4033664} +{"current_steps": 8210, "total_steps": 37885, "loss": 0.0454, "lr": 1.9182107001812303e-06, "epoch": 1.0835422990629537, "percentage": 21.67, "elapsed_time": "0:11:56", "remaining_time": "0:43:09", "throughput": 5633.85, "total_tokens": 4036160} +{"current_steps": 8215, "total_steps": 37885, "loss": 0.0698, "lr": 1.9180281237660136e-06, "epoch": 1.0842021908407022, "percentage": 21.68, "elapsed_time": "0:11:56", "remaining_time": "0:43:08", "throughput": 5634.86, "total_tokens": 4038784} +{"current_steps": 8220, "total_steps": 37885, "loss": 0.0012, "lr": 1.917845352505975e-06, "epoch": 1.0848620826184505, "percentage": 21.7, "elapsed_time": "0:11:57", "remaining_time": "0:43:07", "throughput": 5635.73, "total_tokens": 4041280} +{"current_steps": 8225, "total_steps": 37885, "loss": 0.0258, "lr": 1.917662386439907e-06, "epoch": 1.085521974396199, "percentage": 21.71, "elapsed_time": "0:11:57", "remaining_time": "0:43:07", "throughput": 5636.53, "total_tokens": 4043712} +{"current_steps": 8230, "total_steps": 37885, "loss": 0.0015, "lr": 1.9174792256066427e-06, "epoch": 1.0861818661739475, "percentage": 21.72, "elapsed_time": "0:11:57", "remaining_time": "0:43:06", "throughput": 5636.87, "total_tokens": 4045824} +{"current_steps": 8235, "total_steps": 37885, "loss": 0.0688, "lr": 1.9172958700450565e-06, "epoch": 1.086841757951696, "percentage": 21.74, "elapsed_time": "0:11:58", "remaining_time": "0:43:05", "throughput": 5637.71, "total_tokens": 4048320} +{"current_steps": 8240, "total_steps": 37885, "loss": 0.0083, "lr": 1.9171123197940647e-06, "epoch": 1.0875016497294443, "percentage": 21.75, "elapsed_time": "0:11:58", "remaining_time": "0:43:04", "throughput": 5638.46, "total_tokens": 4050688} +{"current_steps": 8245, "total_steps": 37885, "loss": 0.0006, "lr": 1.916928574892624e-06, "epoch": 1.0881615415071928, "percentage": 21.76, "elapsed_time": "0:11:58", "remaining_time": "0:43:03", "throughput": 5639.5, "total_tokens": 4053312} +{"current_steps": 8250, "total_steps": 37885, "loss": 0.0955, "lr": 1.9167446353797334e-06, "epoch": 1.0888214332849413, "percentage": 21.78, "elapsed_time": "0:11:59", "remaining_time": "0:43:02", "throughput": 5640.49, "total_tokens": 4055872} +{"current_steps": 8255, "total_steps": 37885, "loss": 0.0006, "lr": 1.9165605012944322e-06, "epoch": 1.0894813250626898, "percentage": 21.79, "elapsed_time": "0:11:59", "remaining_time": "0:43:02", "throughput": 5641.3, "total_tokens": 4058304} +{"current_steps": 8260, "total_steps": 37885, "loss": 0.1845, "lr": 1.916376172675802e-06, "epoch": 1.090141216840438, "percentage": 21.8, "elapsed_time": "0:11:59", "remaining_time": "0:43:01", "throughput": 5642.2, "total_tokens": 4060800} +{"current_steps": 8265, "total_steps": 37885, "loss": 0.1351, "lr": 1.916191649562965e-06, "epoch": 1.0908011086181866, "percentage": 21.82, "elapsed_time": "0:12:00", "remaining_time": "0:43:00", "throughput": 5643.33, "total_tokens": 4063488} +{"current_steps": 8270, "total_steps": 37885, "loss": 0.0002, "lr": 1.9160069319950844e-06, "epoch": 1.091461000395935, "percentage": 21.83, "elapsed_time": "0:12:00", "remaining_time": "0:42:59", "throughput": 5644.47, "total_tokens": 4066176} +{"current_steps": 8275, "total_steps": 37885, "loss": 0.0815, "lr": 1.915822020011366e-06, "epoch": 1.0921208921736836, "percentage": 21.84, "elapsed_time": "0:12:00", "remaining_time": "0:42:58", "throughput": 5645.6, "total_tokens": 4068864} +{"current_steps": 8280, "total_steps": 37885, "loss": 0.0007, "lr": 1.915636913651056e-06, "epoch": 1.092780783951432, "percentage": 21.86, "elapsed_time": "0:12:01", "remaining_time": "0:42:58", "throughput": 5646.49, "total_tokens": 4071360} +{"current_steps": 8285, "total_steps": 37885, "loss": 0.0924, "lr": 1.9154516129534414e-06, "epoch": 1.0934406757291804, "percentage": 21.87, "elapsed_time": "0:12:01", "remaining_time": "0:42:57", "throughput": 5647.71, "total_tokens": 4074112} +{"current_steps": 8290, "total_steps": 37885, "loss": 0.0658, "lr": 1.915266117957851e-06, "epoch": 1.0941005675069289, "percentage": 21.88, "elapsed_time": "0:12:01", "remaining_time": "0:42:56", "throughput": 5648.43, "total_tokens": 4076480} +{"current_steps": 8295, "total_steps": 37885, "loss": 0.0017, "lr": 1.915080428703655e-06, "epoch": 1.0947604592846774, "percentage": 21.9, "elapsed_time": "0:12:02", "remaining_time": "0:42:55", "throughput": 5649.32, "total_tokens": 4078976} +{"current_steps": 8300, "total_steps": 37885, "loss": 0.1423, "lr": 1.9148945452302647e-06, "epoch": 1.0954203510624259, "percentage": 21.91, "elapsed_time": "0:12:02", "remaining_time": "0:42:54", "throughput": 5650.44, "total_tokens": 4081664} +{"current_steps": 8305, "total_steps": 37885, "loss": 0.1065, "lr": 1.9147084675771322e-06, "epoch": 1.0960802428401741, "percentage": 21.92, "elapsed_time": "0:12:02", "remaining_time": "0:42:54", "throughput": 5651.49, "total_tokens": 4084288} +{"current_steps": 8310, "total_steps": 37885, "loss": 0.0763, "lr": 1.9145221957837513e-06, "epoch": 1.0967401346179226, "percentage": 21.93, "elapsed_time": "0:12:03", "remaining_time": "0:42:53", "throughput": 5652.22, "total_tokens": 4086656} +{"current_steps": 8315, "total_steps": 37885, "loss": 0.066, "lr": 1.9143357298896564e-06, "epoch": 1.0974000263956711, "percentage": 21.95, "elapsed_time": "0:12:03", "remaining_time": "0:42:52", "throughput": 5653.09, "total_tokens": 4089152} +{"current_steps": 8320, "total_steps": 37885, "loss": 0.0539, "lr": 1.9141490699344243e-06, "epoch": 1.0980599181734196, "percentage": 21.96, "elapsed_time": "0:12:03", "remaining_time": "0:42:51", "throughput": 5653.89, "total_tokens": 4091584} +{"current_steps": 8325, "total_steps": 37885, "loss": 0.0029, "lr": 1.913962215957672e-06, "epoch": 1.098719809951168, "percentage": 21.97, "elapsed_time": "0:12:04", "remaining_time": "0:42:50", "throughput": 5654.51, "total_tokens": 4093888} +{"current_steps": 8330, "total_steps": 37885, "loss": 0.0022, "lr": 1.9137751679990576e-06, "epoch": 1.0993797017289164, "percentage": 21.99, "elapsed_time": "0:12:04", "remaining_time": "0:42:49", "throughput": 5655.24, "total_tokens": 4096256} +{"current_steps": 8335, "total_steps": 37885, "loss": 0.1069, "lr": 1.9135879260982806e-06, "epoch": 1.100039593506665, "percentage": 22.0, "elapsed_time": "0:12:04", "remaining_time": "0:42:49", "throughput": 5656.19, "total_tokens": 4098816} +{"current_steps": 8340, "total_steps": 37885, "loss": 0.1271, "lr": 1.9134004902950826e-06, "epoch": 1.1006994852844134, "percentage": 22.01, "elapsed_time": "0:12:04", "remaining_time": "0:42:48", "throughput": 5656.93, "total_tokens": 4101184} +{"current_steps": 8345, "total_steps": 37885, "loss": 0.1017, "lr": 1.913212860629244e-06, "epoch": 1.101359377062162, "percentage": 22.03, "elapsed_time": "0:12:05", "remaining_time": "0:42:47", "throughput": 5657.54, "total_tokens": 4103488} +{"current_steps": 8350, "total_steps": 37885, "loss": 0.1092, "lr": 1.9130250371405895e-06, "epoch": 1.1020192688399102, "percentage": 22.04, "elapsed_time": "0:12:05", "remaining_time": "0:42:46", "throughput": 5658.17, "total_tokens": 4105792} +{"current_steps": 8355, "total_steps": 37885, "loss": 0.0289, "lr": 1.912837019868982e-06, "epoch": 1.1026791606176587, "percentage": 22.05, "elapsed_time": "0:12:05", "remaining_time": "0:42:45", "throughput": 5659.19, "total_tokens": 4108416} +{"current_steps": 8360, "total_steps": 37885, "loss": 0.0011, "lr": 1.9126488088543273e-06, "epoch": 1.1033390523954072, "percentage": 22.07, "elapsed_time": "0:12:06", "remaining_time": "0:42:45", "throughput": 5660.16, "total_tokens": 4110976} +{"current_steps": 8365, "total_steps": 37885, "loss": 0.1147, "lr": 1.912460404136572e-06, "epoch": 1.1039989441731557, "percentage": 22.08, "elapsed_time": "0:12:06", "remaining_time": "0:42:44", "throughput": 5661.08, "total_tokens": 4113536} +{"current_steps": 8370, "total_steps": 37885, "loss": 0.1414, "lr": 1.912271805755703e-06, "epoch": 1.104658835950904, "percentage": 22.09, "elapsed_time": "0:12:06", "remaining_time": "0:42:43", "throughput": 5661.72, "total_tokens": 4115840} +{"current_steps": 8375, "total_steps": 37885, "loss": 0.0527, "lr": 1.9120830137517498e-06, "epoch": 1.1053187277286525, "percentage": 22.11, "elapsed_time": "0:12:07", "remaining_time": "0:42:42", "throughput": 5662.43, "total_tokens": 4118208} +{"current_steps": 8380, "total_steps": 37885, "loss": 0.0102, "lr": 1.9118940281647816e-06, "epoch": 1.105978619506401, "percentage": 22.12, "elapsed_time": "0:12:07", "remaining_time": "0:42:41", "throughput": 5662.96, "total_tokens": 4120448} +{"current_steps": 8385, "total_steps": 37885, "loss": 0.1928, "lr": 1.9117048490349096e-06, "epoch": 1.1066385112841495, "percentage": 22.13, "elapsed_time": "0:12:07", "remaining_time": "0:42:41", "throughput": 5663.34, "total_tokens": 4122560} +{"current_steps": 8390, "total_steps": 37885, "loss": 0.0306, "lr": 1.9115154764022852e-06, "epoch": 1.1072984030618978, "percentage": 22.15, "elapsed_time": "0:12:08", "remaining_time": "0:42:40", "throughput": 5664.28, "total_tokens": 4125120} +{"current_steps": 8395, "total_steps": 37885, "loss": 0.0867, "lr": 1.9113259103071015e-06, "epoch": 1.1079582948396463, "percentage": 22.16, "elapsed_time": "0:12:08", "remaining_time": "0:42:39", "throughput": 5664.8, "total_tokens": 4127360} +{"current_steps": 8400, "total_steps": 37885, "loss": 0.1242, "lr": 1.9111361507895925e-06, "epoch": 1.1086181866173948, "percentage": 22.17, "elapsed_time": "0:12:08", "remaining_time": "0:42:38", "throughput": 5665.44, "total_tokens": 4129664} +{"current_steps": 8405, "total_steps": 37885, "loss": 0.082, "lr": 1.9109461978900342e-06, "epoch": 1.1092780783951433, "percentage": 22.19, "elapsed_time": "0:12:09", "remaining_time": "0:42:37", "throughput": 5666.16, "total_tokens": 4132032} +{"current_steps": 8410, "total_steps": 37885, "loss": 0.0745, "lr": 1.910756051648741e-06, "epoch": 1.1099379701728918, "percentage": 22.2, "elapsed_time": "0:12:09", "remaining_time": "0:42:36", "throughput": 5667.04, "total_tokens": 4134528} +{"current_steps": 8415, "total_steps": 37885, "loss": 0.001, "lr": 1.9105657121060715e-06, "epoch": 1.11059786195064, "percentage": 22.21, "elapsed_time": "0:12:09", "remaining_time": "0:42:36", "throughput": 5668.23, "total_tokens": 4137280} +{"current_steps": 8420, "total_steps": 37885, "loss": 0.0026, "lr": 1.9103751793024236e-06, "epoch": 1.1112577537283885, "percentage": 22.23, "elapsed_time": "0:12:10", "remaining_time": "0:42:35", "throughput": 5669.09, "total_tokens": 4139776} +{"current_steps": 8425, "total_steps": 37885, "loss": 0.0961, "lr": 1.9101844532782357e-06, "epoch": 1.111917645506137, "percentage": 22.24, "elapsed_time": "0:12:10", "remaining_time": "0:42:34", "throughput": 5669.81, "total_tokens": 4142144} +{"current_steps": 8430, "total_steps": 37885, "loss": 0.0057, "lr": 1.909993534073989e-06, "epoch": 1.1125775372838855, "percentage": 22.25, "elapsed_time": "0:12:10", "remaining_time": "0:42:33", "throughput": 5670.84, "total_tokens": 4144768} +{"current_steps": 8435, "total_steps": 37885, "loss": 0.0725, "lr": 1.9098024217302043e-06, "epoch": 1.1132374290616338, "percentage": 22.26, "elapsed_time": "0:12:11", "remaining_time": "0:42:32", "throughput": 5671.38, "total_tokens": 4147008} +{"current_steps": 8440, "total_steps": 37885, "loss": 0.0589, "lr": 1.909611116287444e-06, "epoch": 1.1138973208393823, "percentage": 22.28, "elapsed_time": "0:12:11", "remaining_time": "0:42:32", "throughput": 5671.58, "total_tokens": 4148992} +{"current_steps": 8445, "total_steps": 37885, "loss": 0.0708, "lr": 1.909419617786311e-06, "epoch": 1.1145572126171308, "percentage": 22.29, "elapsed_time": "0:12:11", "remaining_time": "0:42:31", "throughput": 5672.53, "total_tokens": 4151552} +{"current_steps": 8450, "total_steps": 37885, "loss": 0.1095, "lr": 1.90922792626745e-06, "epoch": 1.1152171043948793, "percentage": 22.3, "elapsed_time": "0:12:12", "remaining_time": "0:42:30", "throughput": 5673.53, "total_tokens": 4154176} +{"current_steps": 8455, "total_steps": 37885, "loss": 0.1106, "lr": 1.9090360417715454e-06, "epoch": 1.1158769961726276, "percentage": 22.32, "elapsed_time": "0:12:12", "remaining_time": "0:42:29", "throughput": 5674.47, "total_tokens": 4156736} +{"current_steps": 8460, "total_steps": 37885, "loss": 0.0834, "lr": 1.9088439643393236e-06, "epoch": 1.116536887950376, "percentage": 22.33, "elapsed_time": "0:12:12", "remaining_time": "0:42:28", "throughput": 5675.02, "total_tokens": 4158976} +{"current_steps": 8465, "total_steps": 37885, "loss": 0.001, "lr": 1.9086516940115518e-06, "epoch": 1.1171967797281246, "percentage": 22.34, "elapsed_time": "0:12:13", "remaining_time": "0:42:28", "throughput": 5675.63, "total_tokens": 4161280} +{"current_steps": 8470, "total_steps": 37885, "loss": 0.0794, "lr": 1.908459230829038e-06, "epoch": 1.117856671505873, "percentage": 22.36, "elapsed_time": "0:12:13", "remaining_time": "0:42:27", "throughput": 5676.48, "total_tokens": 4163776} +{"current_steps": 8475, "total_steps": 37885, "loss": 0.1537, "lr": 1.908266574832631e-06, "epoch": 1.1185165632836216, "percentage": 22.37, "elapsed_time": "0:12:13", "remaining_time": "0:42:26", "throughput": 5677.41, "total_tokens": 4166336} +{"current_steps": 8480, "total_steps": 37885, "loss": 0.1069, "lr": 1.90807372606322e-06, "epoch": 1.1191764550613699, "percentage": 22.38, "elapsed_time": "0:12:14", "remaining_time": "0:42:25", "throughput": 5678.27, "total_tokens": 4168832} +{"current_steps": 8485, "total_steps": 37885, "loss": 0.0022, "lr": 1.9078806845617372e-06, "epoch": 1.1198363468391184, "percentage": 22.4, "elapsed_time": "0:12:14", "remaining_time": "0:42:25", "throughput": 5679.36, "total_tokens": 4171520} +{"current_steps": 8490, "total_steps": 37885, "loss": 0.0587, "lr": 1.907687450369153e-06, "epoch": 1.1204962386168669, "percentage": 22.41, "elapsed_time": "0:12:14", "remaining_time": "0:42:24", "throughput": 5680.43, "total_tokens": 4174208} +{"current_steps": 8495, "total_steps": 37885, "loss": 0.1138, "lr": 1.9074940235264805e-06, "epoch": 1.1211561303946154, "percentage": 22.42, "elapsed_time": "0:12:15", "remaining_time": "0:42:23", "throughput": 5681.03, "total_tokens": 4176512} +{"current_steps": 8500, "total_steps": 37885, "loss": 0.0606, "lr": 1.9073004040747732e-06, "epoch": 1.1218160221723636, "percentage": 22.44, "elapsed_time": "0:12:15", "remaining_time": "0:42:22", "throughput": 5681.96, "total_tokens": 4179072} +{"current_steps": 8505, "total_steps": 37885, "loss": 0.0689, "lr": 1.9071065920551254e-06, "epoch": 1.1224759139501121, "percentage": 22.45, "elapsed_time": "0:12:15", "remaining_time": "0:42:21", "throughput": 5682.82, "total_tokens": 4181568} +{"current_steps": 8510, "total_steps": 37885, "loss": 0.0378, "lr": 1.906912587508672e-06, "epoch": 1.1231358057278606, "percentage": 22.46, "elapsed_time": "0:12:16", "remaining_time": "0:42:21", "throughput": 5683.6, "total_tokens": 4184000} +{"current_steps": 8515, "total_steps": 37885, "loss": 0.0591, "lr": 1.9067183904765893e-06, "epoch": 1.1237956975056091, "percentage": 22.48, "elapsed_time": "0:12:16", "remaining_time": "0:42:20", "throughput": 5684.11, "total_tokens": 4186240} +{"current_steps": 8520, "total_steps": 37885, "loss": 0.0016, "lr": 1.9065240010000942e-06, "epoch": 1.1244555892833574, "percentage": 22.49, "elapsed_time": "0:12:16", "remaining_time": "0:42:19", "throughput": 5684.71, "total_tokens": 4188544} +{"current_steps": 8525, "total_steps": 37885, "loss": 0.1241, "lr": 1.9063294191204442e-06, "epoch": 1.125115481061106, "percentage": 22.5, "elapsed_time": "0:12:17", "remaining_time": "0:42:18", "throughput": 5685.72, "total_tokens": 4191168} +{"current_steps": 8530, "total_steps": 37885, "loss": 0.2255, "lr": 1.9061346448789383e-06, "epoch": 1.1257753728388544, "percentage": 22.52, "elapsed_time": "0:12:17", "remaining_time": "0:42:17", "throughput": 5686.97, "total_tokens": 4193984} +{"current_steps": 8535, "total_steps": 37885, "loss": 0.001, "lr": 1.9059396783169157e-06, "epoch": 1.126435264616603, "percentage": 22.53, "elapsed_time": "0:12:17", "remaining_time": "0:42:17", "throughput": 5688.35, "total_tokens": 4196928} +{"current_steps": 8540, "total_steps": 37885, "loss": 0.0009, "lr": 1.9057445194757566e-06, "epoch": 1.1270951563943514, "percentage": 22.54, "elapsed_time": "0:12:18", "remaining_time": "0:42:16", "throughput": 5689.2, "total_tokens": 4199424} +{"current_steps": 8545, "total_steps": 37885, "loss": 0.1215, "lr": 1.9055491683968822e-06, "epoch": 1.1277550481720997, "percentage": 22.56, "elapsed_time": "0:12:18", "remaining_time": "0:42:15", "throughput": 5689.63, "total_tokens": 4201600} +{"current_steps": 8550, "total_steps": 37885, "loss": 0.0429, "lr": 1.9053536251217544e-06, "epoch": 1.1284149399498482, "percentage": 22.57, "elapsed_time": "0:12:18", "remaining_time": "0:42:14", "throughput": 5690.3, "total_tokens": 4203968} +{"current_steps": 8555, "total_steps": 37885, "loss": 0.0003, "lr": 1.9051578896918756e-06, "epoch": 1.1290748317275967, "percentage": 22.58, "elapsed_time": "0:12:19", "remaining_time": "0:42:14", "throughput": 5690.98, "total_tokens": 4206336} +{"current_steps": 8560, "total_steps": 37885, "loss": 0.0588, "lr": 1.9049619621487894e-06, "epoch": 1.1297347235053452, "percentage": 22.59, "elapsed_time": "0:12:19", "remaining_time": "0:42:13", "throughput": 5691.98, "total_tokens": 4208960} +{"current_steps": 8565, "total_steps": 37885, "loss": 0.0102, "lr": 1.9047658425340798e-06, "epoch": 1.1303946152830935, "percentage": 22.61, "elapsed_time": "0:12:19", "remaining_time": "0:42:12", "throughput": 5692.5, "total_tokens": 4211200} +{"current_steps": 8570, "total_steps": 37885, "loss": 0.0002, "lr": 1.904569530889372e-06, "epoch": 1.131054507060842, "percentage": 22.62, "elapsed_time": "0:12:20", "remaining_time": "0:42:11", "throughput": 5693.51, "total_tokens": 4213824} +{"current_steps": 8575, "total_steps": 37885, "loss": 0.0023, "lr": 1.9043730272563319e-06, "epoch": 1.1317143988385905, "percentage": 22.63, "elapsed_time": "0:12:20", "remaining_time": "0:42:10", "throughput": 5694.18, "total_tokens": 4216192} +{"current_steps": 8580, "total_steps": 37885, "loss": 0.0643, "lr": 1.9041763316766653e-06, "epoch": 1.132374290616339, "percentage": 22.65, "elapsed_time": "0:12:20", "remaining_time": "0:42:10", "throughput": 5694.52, "total_tokens": 4218304} +{"current_steps": 8585, "total_steps": 37885, "loss": 0.0723, "lr": 1.90397944419212e-06, "epoch": 1.1330341823940873, "percentage": 22.66, "elapsed_time": "0:12:21", "remaining_time": "0:42:09", "throughput": 5695.14, "total_tokens": 4220608} +{"current_steps": 8590, "total_steps": 37885, "loss": 0.1629, "lr": 1.9037823648444839e-06, "epoch": 1.1336940741718358, "percentage": 22.67, "elapsed_time": "0:12:21", "remaining_time": "0:42:08", "throughput": 5695.87, "total_tokens": 4223040} +{"current_steps": 8595, "total_steps": 37885, "loss": 0.0002, "lr": 1.9035850936755855e-06, "epoch": 1.1343539659495843, "percentage": 22.69, "elapsed_time": "0:12:21", "remaining_time": "0:42:07", "throughput": 5696.89, "total_tokens": 4225664} +{"current_steps": 8600, "total_steps": 37885, "loss": 0.1971, "lr": 1.9033876307272941e-06, "epoch": 1.1350138577273328, "percentage": 22.7, "elapsed_time": "0:12:22", "remaining_time": "0:42:06", "throughput": 5697.83, "total_tokens": 4228224} +{"current_steps": 8605, "total_steps": 37885, "loss": 0.0001, "lr": 1.9031899760415198e-06, "epoch": 1.1356737495050813, "percentage": 22.71, "elapsed_time": "0:12:22", "remaining_time": "0:42:06", "throughput": 5698.76, "total_tokens": 4230784} +{"current_steps": 8610, "total_steps": 37885, "loss": 0.1488, "lr": 1.9029921296602139e-06, "epoch": 1.1363336412828295, "percentage": 22.73, "elapsed_time": "0:12:22", "remaining_time": "0:42:05", "throughput": 5699.52, "total_tokens": 4233216} +{"current_steps": 8615, "total_steps": 37885, "loss": 0.0005, "lr": 1.9027940916253668e-06, "epoch": 1.136993533060578, "percentage": 22.74, "elapsed_time": "0:12:23", "remaining_time": "0:42:04", "throughput": 5700.21, "total_tokens": 4235584} +{"current_steps": 8620, "total_steps": 37885, "loss": 0.112, "lr": 1.9025958619790118e-06, "epoch": 1.1376534248383265, "percentage": 22.75, "elapsed_time": "0:12:23", "remaining_time": "0:42:03", "throughput": 5700.88, "total_tokens": 4237952} +{"current_steps": 8625, "total_steps": 37885, "loss": 0.0003, "lr": 1.902397440763221e-06, "epoch": 1.138313316616075, "percentage": 22.77, "elapsed_time": "0:12:23", "remaining_time": "0:42:03", "throughput": 5702.11, "total_tokens": 4240768} +{"current_steps": 8630, "total_steps": 37885, "loss": 0.2625, "lr": 1.9021988280201083e-06, "epoch": 1.1389732083938233, "percentage": 22.78, "elapsed_time": "0:12:24", "remaining_time": "0:42:02", "throughput": 5702.66, "total_tokens": 4243072} +{"current_steps": 8635, "total_steps": 37885, "loss": 0.003, "lr": 1.9020000237918273e-06, "epoch": 1.1396331001715718, "percentage": 22.79, "elapsed_time": "0:12:24", "remaining_time": "0:42:01", "throughput": 5703.57, "total_tokens": 4245632} +{"current_steps": 8640, "total_steps": 37885, "loss": 0.0461, "lr": 1.9018010281205727e-06, "epoch": 1.1402929919493203, "percentage": 22.81, "elapsed_time": "0:12:24", "remaining_time": "0:42:00", "throughput": 5704.35, "total_tokens": 4248064} +{"current_steps": 8645, "total_steps": 37885, "loss": 0.0676, "lr": 1.9016018410485809e-06, "epoch": 1.1409528837270688, "percentage": 22.82, "elapsed_time": "0:12:25", "remaining_time": "0:41:59", "throughput": 5705.08, "total_tokens": 4250496} +{"current_steps": 8650, "total_steps": 37885, "loss": 0.3759, "lr": 1.901402462618127e-06, "epoch": 1.141612775504817, "percentage": 22.83, "elapsed_time": "0:12:25", "remaining_time": "0:41:59", "throughput": 5706.07, "total_tokens": 4253120} +{"current_steps": 8655, "total_steps": 37885, "loss": 0.2103, "lr": 1.9012028928715272e-06, "epoch": 1.1422726672825656, "percentage": 22.85, "elapsed_time": "0:12:25", "remaining_time": "0:41:58", "throughput": 5706.97, "total_tokens": 4255680} +{"current_steps": 8660, "total_steps": 37885, "loss": 0.0168, "lr": 1.9010031318511401e-06, "epoch": 1.142932559060314, "percentage": 22.86, "elapsed_time": "0:12:26", "remaining_time": "0:41:57", "throughput": 5707.57, "total_tokens": 4257984} +{"current_steps": 8665, "total_steps": 37885, "loss": 0.0013, "lr": 1.9008031795993627e-06, "epoch": 1.1435924508380626, "percentage": 22.87, "elapsed_time": "0:12:26", "remaining_time": "0:41:56", "throughput": 5708.1, "total_tokens": 4260224} +{"current_steps": 8670, "total_steps": 37885, "loss": 0.0495, "lr": 1.9006030361586337e-06, "epoch": 1.144252342615811, "percentage": 22.89, "elapsed_time": "0:12:26", "remaining_time": "0:41:56", "throughput": 5708.85, "total_tokens": 4262656} +{"current_steps": 8675, "total_steps": 37885, "loss": 0.0547, "lr": 1.9004027015714315e-06, "epoch": 1.1449122343935594, "percentage": 22.9, "elapsed_time": "0:12:27", "remaining_time": "0:41:55", "throughput": 5709.44, "total_tokens": 4264960} +{"current_steps": 8680, "total_steps": 37885, "loss": 0.0832, "lr": 1.9002021758802762e-06, "epoch": 1.1455721261713079, "percentage": 22.91, "elapsed_time": "0:12:27", "remaining_time": "0:41:54", "throughput": 5710.26, "total_tokens": 4267456} +{"current_steps": 8685, "total_steps": 37885, "loss": 0.0731, "lr": 1.900001459127728e-06, "epoch": 1.1462320179490564, "percentage": 22.92, "elapsed_time": "0:12:27", "remaining_time": "0:41:53", "throughput": 5710.86, "total_tokens": 4269760} +{"current_steps": 8690, "total_steps": 37885, "loss": 0.031, "lr": 1.8998005513563872e-06, "epoch": 1.1468919097268049, "percentage": 22.94, "elapsed_time": "0:12:27", "remaining_time": "0:41:52", "throughput": 5711.86, "total_tokens": 4272384} +{"current_steps": 8695, "total_steps": 37885, "loss": 0.0028, "lr": 1.8995994526088955e-06, "epoch": 1.1475518015045532, "percentage": 22.95, "elapsed_time": "0:12:28", "remaining_time": "0:41:52", "throughput": 5712.84, "total_tokens": 4275008} +{"current_steps": 8700, "total_steps": 37885, "loss": 0.014, "lr": 1.8993981629279342e-06, "epoch": 1.1482116932823017, "percentage": 22.96, "elapsed_time": "0:12:28", "remaining_time": "0:41:51", "throughput": 5713.58, "total_tokens": 4277440} +{"current_steps": 8705, "total_steps": 37885, "loss": 0.0003, "lr": 1.8991966823562258e-06, "epoch": 1.1488715850600502, "percentage": 22.98, "elapsed_time": "0:12:28", "remaining_time": "0:41:50", "throughput": 5714.17, "total_tokens": 4279744} +{"current_steps": 8710, "total_steps": 37885, "loss": 0.1334, "lr": 1.8989950109365328e-06, "epoch": 1.1495314768377987, "percentage": 22.99, "elapsed_time": "0:12:29", "remaining_time": "0:41:49", "throughput": 5714.77, "total_tokens": 4282048} +{"current_steps": 8715, "total_steps": 37885, "loss": 0.0581, "lr": 1.8987931487116591e-06, "epoch": 1.150191368615547, "percentage": 23.0, "elapsed_time": "0:12:29", "remaining_time": "0:41:49", "throughput": 5715.27, "total_tokens": 4284288} +{"current_steps": 8720, "total_steps": 37885, "loss": 0.0003, "lr": 1.898591095724448e-06, "epoch": 1.1508512603932954, "percentage": 23.02, "elapsed_time": "0:12:29", "remaining_time": "0:41:48", "throughput": 5716.3, "total_tokens": 4286976} +{"current_steps": 8725, "total_steps": 37885, "loss": 0.0615, "lr": 1.898388852017784e-06, "epoch": 1.151511152171044, "percentage": 23.03, "elapsed_time": "0:12:30", "remaining_time": "0:41:47", "throughput": 5717.11, "total_tokens": 4289472} +{"current_steps": 8730, "total_steps": 37885, "loss": 0.0752, "lr": 1.8981864176345914e-06, "epoch": 1.1521710439487924, "percentage": 23.04, "elapsed_time": "0:12:30", "remaining_time": "0:41:46", "throughput": 5718.15, "total_tokens": 4292160} +{"current_steps": 8735, "total_steps": 37885, "loss": 0.1534, "lr": 1.8979837926178362e-06, "epoch": 1.152830935726541, "percentage": 23.06, "elapsed_time": "0:12:30", "remaining_time": "0:41:46", "throughput": 5718.82, "total_tokens": 4294528} +{"current_steps": 8740, "total_steps": 37885, "loss": 0.0736, "lr": 1.8977809770105235e-06, "epoch": 1.1534908275042892, "percentage": 23.07, "elapsed_time": "0:12:31", "remaining_time": "0:41:45", "throughput": 5719.78, "total_tokens": 4297152} +{"current_steps": 8745, "total_steps": 37885, "loss": 0.0011, "lr": 1.8975779708556998e-06, "epoch": 1.1541507192820377, "percentage": 23.08, "elapsed_time": "0:12:31", "remaining_time": "0:41:44", "throughput": 5720.96, "total_tokens": 4299968} +{"current_steps": 8750, "total_steps": 37885, "loss": 0.0063, "lr": 1.8973747741964515e-06, "epoch": 1.1548106110597862, "percentage": 23.1, "elapsed_time": "0:12:31", "remaining_time": "0:41:43", "throughput": 5721.36, "total_tokens": 4302144} +{"current_steps": 8755, "total_steps": 37885, "loss": 0.0892, "lr": 1.8971713870759057e-06, "epoch": 1.1554705028375347, "percentage": 23.11, "elapsed_time": "0:12:32", "remaining_time": "0:41:42", "throughput": 5722.1, "total_tokens": 4304576} +{"current_steps": 8760, "total_steps": 37885, "loss": 0.0785, "lr": 1.8969678095372296e-06, "epoch": 1.156130394615283, "percentage": 23.12, "elapsed_time": "0:12:32", "remaining_time": "0:41:42", "throughput": 5722.62, "total_tokens": 4306816} +{"current_steps": 8765, "total_steps": 37885, "loss": 0.1083, "lr": 1.8967640416236313e-06, "epoch": 1.1567902863930315, "percentage": 23.14, "elapsed_time": "0:12:32", "remaining_time": "0:41:41", "throughput": 5723.57, "total_tokens": 4309440} +{"current_steps": 8770, "total_steps": 37885, "loss": 0.121, "lr": 1.8965600833783594e-06, "epoch": 1.15745017817078, "percentage": 23.15, "elapsed_time": "0:12:33", "remaining_time": "0:41:40", "throughput": 5724.46, "total_tokens": 4312000} +{"current_steps": 8775, "total_steps": 37885, "loss": 0.1331, "lr": 1.8963559348447015e-06, "epoch": 1.1581100699485285, "percentage": 23.16, "elapsed_time": "0:12:33", "remaining_time": "0:41:39", "throughput": 5725.18, "total_tokens": 4314432} +{"current_steps": 8780, "total_steps": 37885, "loss": 0.0048, "lr": 1.8961515960659878e-06, "epoch": 1.1587699617262768, "percentage": 23.18, "elapsed_time": "0:12:33", "remaining_time": "0:41:39", "throughput": 5726.0, "total_tokens": 4316928} +{"current_steps": 8785, "total_steps": 37885, "loss": 0.1, "lr": 1.8959470670855873e-06, "epoch": 1.1594298535040253, "percentage": 23.19, "elapsed_time": "0:12:34", "remaining_time": "0:41:38", "throughput": 5726.49, "total_tokens": 4319168} +{"current_steps": 8790, "total_steps": 37885, "loss": 0.1709, "lr": 1.8957423479469095e-06, "epoch": 1.1600897452817738, "percentage": 23.2, "elapsed_time": "0:12:34", "remaining_time": "0:41:37", "throughput": 5726.91, "total_tokens": 4321344} +{"current_steps": 8795, "total_steps": 37885, "loss": 0.0571, "lr": 1.8955374386934049e-06, "epoch": 1.1607496370595223, "percentage": 23.21, "elapsed_time": "0:12:34", "remaining_time": "0:41:36", "throughput": 5727.79, "total_tokens": 4323904} +{"current_steps": 8800, "total_steps": 37885, "loss": 0.0995, "lr": 1.895332339368564e-06, "epoch": 1.1614095288372708, "percentage": 23.23, "elapsed_time": "0:12:35", "remaining_time": "0:41:36", "throughput": 5728.46, "total_tokens": 4326272} +{"current_steps": 8805, "total_steps": 37885, "loss": 0.0573, "lr": 1.8951270500159176e-06, "epoch": 1.162069420615019, "percentage": 23.24, "elapsed_time": "0:12:35", "remaining_time": "0:41:35", "throughput": 5729.58, "total_tokens": 4329024} +{"current_steps": 8810, "total_steps": 37885, "loss": 0.0971, "lr": 1.8949215706790364e-06, "epoch": 1.1627293123927676, "percentage": 23.25, "elapsed_time": "0:12:35", "remaining_time": "0:41:34", "throughput": 5730.16, "total_tokens": 4331328} +{"current_steps": 8815, "total_steps": 37885, "loss": 0.1817, "lr": 1.8947159014015326e-06, "epoch": 1.163389204170516, "percentage": 23.27, "elapsed_time": "0:12:36", "remaining_time": "0:41:33", "throughput": 5730.81, "total_tokens": 4333696} +{"current_steps": 8820, "total_steps": 37885, "loss": 0.1063, "lr": 1.8945100422270578e-06, "epoch": 1.1640490959482646, "percentage": 23.28, "elapsed_time": "0:12:36", "remaining_time": "0:41:33", "throughput": 5731.77, "total_tokens": 4336320} +{"current_steps": 8825, "total_steps": 37885, "loss": 0.0024, "lr": 1.8943039931993043e-06, "epoch": 1.164708987726013, "percentage": 23.29, "elapsed_time": "0:12:36", "remaining_time": "0:41:32", "throughput": 5732.43, "total_tokens": 4338688} +{"current_steps": 8830, "total_steps": 37885, "loss": 0.078, "lr": 1.8940977543620038e-06, "epoch": 1.1653688795037613, "percentage": 23.31, "elapsed_time": "0:12:37", "remaining_time": "0:41:31", "throughput": 5733.39, "total_tokens": 4341312} +{"current_steps": 8835, "total_steps": 37885, "loss": 0.0008, "lr": 1.89389132575893e-06, "epoch": 1.1660287712815098, "percentage": 23.32, "elapsed_time": "0:12:37", "remaining_time": "0:41:30", "throughput": 5734.21, "total_tokens": 4343808} +{"current_steps": 8840, "total_steps": 37885, "loss": 0.0904, "lr": 1.8936847074338948e-06, "epoch": 1.1666886630592583, "percentage": 23.33, "elapsed_time": "0:12:37", "remaining_time": "0:41:30", "throughput": 5734.85, "total_tokens": 4346176} +{"current_steps": 8845, "total_steps": 37885, "loss": 0.0682, "lr": 1.8934778994307526e-06, "epoch": 1.1673485548370066, "percentage": 23.35, "elapsed_time": "0:12:38", "remaining_time": "0:41:29", "throughput": 5735.67, "total_tokens": 4348672} +{"current_steps": 8850, "total_steps": 37885, "loss": 0.1466, "lr": 1.8932709017933958e-06, "epoch": 1.1680084466147551, "percentage": 23.36, "elapsed_time": "0:12:38", "remaining_time": "0:41:28", "throughput": 5736.25, "total_tokens": 4350976} +{"current_steps": 8855, "total_steps": 37885, "loss": 0.0004, "lr": 1.8930637145657592e-06, "epoch": 1.1686683383925036, "percentage": 23.37, "elapsed_time": "0:12:38", "remaining_time": "0:41:27", "throughput": 5737.14, "total_tokens": 4353536} +{"current_steps": 8860, "total_steps": 37885, "loss": 0.1143, "lr": 1.8928563377918157e-06, "epoch": 1.1693282301702521, "percentage": 23.39, "elapsed_time": "0:12:39", "remaining_time": "0:41:26", "throughput": 5737.55, "total_tokens": 4355712} +{"current_steps": 8865, "total_steps": 37885, "loss": 0.0635, "lr": 1.8926487715155802e-06, "epoch": 1.1699881219480006, "percentage": 23.4, "elapsed_time": "0:12:39", "remaining_time": "0:41:26", "throughput": 5738.51, "total_tokens": 4358336} +{"current_steps": 8870, "total_steps": 37885, "loss": 0.0941, "lr": 1.892441015781107e-06, "epoch": 1.170648013725749, "percentage": 23.41, "elapsed_time": "0:12:39", "remaining_time": "0:41:25", "throughput": 5739.4, "total_tokens": 4360896} +{"current_steps": 8875, "total_steps": 37885, "loss": 0.0008, "lr": 1.892233070632491e-06, "epoch": 1.1713079055034974, "percentage": 23.43, "elapsed_time": "0:12:40", "remaining_time": "0:41:24", "throughput": 5740.28, "total_tokens": 4363456} +{"current_steps": 8880, "total_steps": 37885, "loss": 0.1365, "lr": 1.8920249361138665e-06, "epoch": 1.171967797281246, "percentage": 23.44, "elapsed_time": "0:12:40", "remaining_time": "0:41:23", "throughput": 5740.84, "total_tokens": 4365760} +{"current_steps": 8885, "total_steps": 37885, "loss": 0.0024, "lr": 1.891816612269409e-06, "epoch": 1.1726276890589944, "percentage": 23.45, "elapsed_time": "0:12:40", "remaining_time": "0:41:23", "throughput": 5741.55, "total_tokens": 4368192} +{"current_steps": 8890, "total_steps": 37885, "loss": 0.0928, "lr": 1.8916080991433337e-06, "epoch": 1.173287580836743, "percentage": 23.47, "elapsed_time": "0:12:41", "remaining_time": "0:41:22", "throughput": 5742.43, "total_tokens": 4370752} +{"current_steps": 8895, "total_steps": 37885, "loss": 0.0007, "lr": 1.8913993967798956e-06, "epoch": 1.1739474726144912, "percentage": 23.48, "elapsed_time": "0:12:41", "remaining_time": "0:41:21", "throughput": 5743.39, "total_tokens": 4373376} +{"current_steps": 8900, "total_steps": 37885, "loss": 0.0492, "lr": 1.8911905052233905e-06, "epoch": 1.1746073643922397, "percentage": 23.49, "elapsed_time": "0:12:41", "remaining_time": "0:41:20", "throughput": 5744.33, "total_tokens": 4376000} +{"current_steps": 8905, "total_steps": 37885, "loss": 0.1225, "lr": 1.8909814245181543e-06, "epoch": 1.1752672561699882, "percentage": 23.51, "elapsed_time": "0:12:42", "remaining_time": "0:41:20", "throughput": 5745.14, "total_tokens": 4378496} +{"current_steps": 8910, "total_steps": 37885, "loss": 0.0383, "lr": 1.890772154708563e-06, "epoch": 1.1759271479477365, "percentage": 23.52, "elapsed_time": "0:12:42", "remaining_time": "0:41:19", "throughput": 5745.55, "total_tokens": 4380672} +{"current_steps": 8915, "total_steps": 37885, "loss": 0.1929, "lr": 1.8905626958390317e-06, "epoch": 1.176587039725485, "percentage": 23.53, "elapsed_time": "0:12:42", "remaining_time": "0:41:18", "throughput": 5746.35, "total_tokens": 4383168} +{"current_steps": 8920, "total_steps": 37885, "loss": 0.0006, "lr": 1.8903530479540176e-06, "epoch": 1.1772469315032335, "percentage": 23.54, "elapsed_time": "0:12:43", "remaining_time": "0:41:17", "throughput": 5746.92, "total_tokens": 4385472} +{"current_steps": 8925, "total_steps": 37885, "loss": 0.0004, "lr": 1.8901432110980164e-06, "epoch": 1.177906823280982, "percentage": 23.56, "elapsed_time": "0:12:43", "remaining_time": "0:41:17", "throughput": 5747.57, "total_tokens": 4387840} +{"current_steps": 8930, "total_steps": 37885, "loss": 0.1419, "lr": 1.8899331853155648e-06, "epoch": 1.1785667150587305, "percentage": 23.57, "elapsed_time": "0:12:43", "remaining_time": "0:41:16", "throughput": 5748.34, "total_tokens": 4390336} +{"current_steps": 8935, "total_steps": 37885, "loss": 0.099, "lr": 1.8897229706512387e-06, "epoch": 1.1792266068364787, "percentage": 23.58, "elapsed_time": "0:12:44", "remaining_time": "0:41:15", "throughput": 5748.91, "total_tokens": 4392640} +{"current_steps": 8940, "total_steps": 37885, "loss": 0.1339, "lr": 1.889512567149655e-06, "epoch": 1.1798864986142272, "percentage": 23.6, "elapsed_time": "0:12:44", "remaining_time": "0:41:14", "throughput": 5749.68, "total_tokens": 4395136} +{"current_steps": 8945, "total_steps": 37885, "loss": 0.0047, "lr": 1.88930197485547e-06, "epoch": 1.1805463903919757, "percentage": 23.61, "elapsed_time": "0:12:44", "remaining_time": "0:41:14", "throughput": 5750.31, "total_tokens": 4397504} +{"current_steps": 8950, "total_steps": 37885, "loss": 0.0047, "lr": 1.8890911938133814e-06, "epoch": 1.1812062821697242, "percentage": 23.62, "elapsed_time": "0:12:45", "remaining_time": "0:41:13", "throughput": 5750.96, "total_tokens": 4399872} +{"current_steps": 8955, "total_steps": 37885, "loss": 0.0867, "lr": 1.8888802240681248e-06, "epoch": 1.1818661739474727, "percentage": 23.64, "elapsed_time": "0:12:45", "remaining_time": "0:41:12", "throughput": 5751.37, "total_tokens": 4402048} +{"current_steps": 8960, "total_steps": 37885, "loss": 0.003, "lr": 1.888669065664477e-06, "epoch": 1.182526065725221, "percentage": 23.65, "elapsed_time": "0:12:45", "remaining_time": "0:41:11", "throughput": 5752.01, "total_tokens": 4404416} +{"current_steps": 8965, "total_steps": 37885, "loss": 0.0207, "lr": 1.8884577186472557e-06, "epoch": 1.1831859575029695, "percentage": 23.66, "elapsed_time": "0:12:46", "remaining_time": "0:41:11", "throughput": 5752.56, "total_tokens": 4406720} +{"current_steps": 8970, "total_steps": 37885, "loss": 0.1395, "lr": 1.8882461830613173e-06, "epoch": 1.183845849280718, "percentage": 23.68, "elapsed_time": "0:12:46", "remaining_time": "0:41:10", "throughput": 5752.95, "total_tokens": 4408896} +{"current_steps": 8975, "total_steps": 37885, "loss": 0.0004, "lr": 1.8880344589515587e-06, "epoch": 1.1845057410584663, "percentage": 23.69, "elapsed_time": "0:12:46", "remaining_time": "0:41:09", "throughput": 5753.72, "total_tokens": 4411392} +{"current_steps": 8980, "total_steps": 37885, "loss": 0.0003, "lr": 1.887822546362917e-06, "epoch": 1.1851656328362148, "percentage": 23.7, "elapsed_time": "0:12:47", "remaining_time": "0:41:08", "throughput": 5754.51, "total_tokens": 4413888} +{"current_steps": 8985, "total_steps": 37885, "loss": 0.0006, "lr": 1.8876104453403686e-06, "epoch": 1.1858255246139633, "percentage": 23.72, "elapsed_time": "0:12:47", "remaining_time": "0:41:08", "throughput": 5755.3, "total_tokens": 4416384} +{"current_steps": 8990, "total_steps": 37885, "loss": 0.21, "lr": 1.8873981559289308e-06, "epoch": 1.1864854163917118, "percentage": 23.73, "elapsed_time": "0:12:47", "remaining_time": "0:41:07", "throughput": 5756.4, "total_tokens": 4419136} +{"current_steps": 8995, "total_steps": 37885, "loss": 0.0786, "lr": 1.8871856781736604e-06, "epoch": 1.1871453081694603, "percentage": 23.74, "elapsed_time": "0:12:48", "remaining_time": "0:41:06", "throughput": 5757.18, "total_tokens": 4421632} +{"current_steps": 9000, "total_steps": 37885, "loss": 0.0006, "lr": 1.8869730121196542e-06, "epoch": 1.1878051999472086, "percentage": 23.76, "elapsed_time": "0:12:48", "remaining_time": "0:41:05", "throughput": 5758.19, "total_tokens": 4424320} +{"current_steps": 9005, "total_steps": 37885, "loss": 0.1348, "lr": 1.8867601578120495e-06, "epoch": 1.188465091724957, "percentage": 23.77, "elapsed_time": "0:12:48", "remaining_time": "0:41:05", "throughput": 5759.03, "total_tokens": 4426880} +{"current_steps": 9010, "total_steps": 37885, "loss": 0.0006, "lr": 1.8865471152960225e-06, "epoch": 1.1891249835027056, "percentage": 23.78, "elapsed_time": "0:12:49", "remaining_time": "0:41:04", "throughput": 5759.79, "total_tokens": 4429376} +{"current_steps": 9015, "total_steps": 37885, "loss": 0.1167, "lr": 1.8863338846167905e-06, "epoch": 1.189784875280454, "percentage": 23.8, "elapsed_time": "0:12:49", "remaining_time": "0:41:03", "throughput": 5760.79, "total_tokens": 4432064} +{"current_steps": 9020, "total_steps": 37885, "loss": 0.0008, "lr": 1.8861204658196095e-06, "epoch": 1.1904447670582026, "percentage": 23.81, "elapsed_time": "0:12:49", "remaining_time": "0:41:03", "throughput": 5761.27, "total_tokens": 4434304} +{"current_steps": 9025, "total_steps": 37885, "loss": 0.0008, "lr": 1.8859068589497765e-06, "epoch": 1.1911046588359508, "percentage": 23.82, "elapsed_time": "0:12:50", "remaining_time": "0:41:02", "throughput": 5761.73, "total_tokens": 4436544} +{"current_steps": 9030, "total_steps": 37885, "loss": 0.0947, "lr": 1.8856930640526277e-06, "epoch": 1.1917645506136993, "percentage": 23.84, "elapsed_time": "0:12:50", "remaining_time": "0:41:01", "throughput": 5762.66, "total_tokens": 4439168} +{"current_steps": 9035, "total_steps": 37885, "loss": 0.0814, "lr": 1.88547908117354e-06, "epoch": 1.1924244423914478, "percentage": 23.85, "elapsed_time": "0:12:50", "remaining_time": "0:41:00", "throughput": 5763.22, "total_tokens": 4441472} +{"current_steps": 9040, "total_steps": 37885, "loss": 0.2709, "lr": 1.8852649103579292e-06, "epoch": 1.1930843341691963, "percentage": 23.86, "elapsed_time": "0:12:50", "remaining_time": "0:41:00", "throughput": 5763.85, "total_tokens": 4443840} +{"current_steps": 9045, "total_steps": 37885, "loss": 0.0662, "lr": 1.885050551651252e-06, "epoch": 1.1937442259469446, "percentage": 23.87, "elapsed_time": "0:12:51", "remaining_time": "0:40:59", "throughput": 5764.22, "total_tokens": 4446016} +{"current_steps": 9050, "total_steps": 37885, "loss": 0.2496, "lr": 1.8848360050990042e-06, "epoch": 1.1944041177246931, "percentage": 23.89, "elapsed_time": "0:12:51", "remaining_time": "0:40:58", "throughput": 5764.78, "total_tokens": 4448320} +{"current_steps": 9055, "total_steps": 37885, "loss": 0.0971, "lr": 1.8846212707467216e-06, "epoch": 1.1950640095024416, "percentage": 23.9, "elapsed_time": "0:12:51", "remaining_time": "0:40:57", "throughput": 5765.66, "total_tokens": 4450880} +{"current_steps": 9060, "total_steps": 37885, "loss": 0.002, "lr": 1.8844063486399805e-06, "epoch": 1.1957239012801901, "percentage": 23.91, "elapsed_time": "0:12:52", "remaining_time": "0:40:57", "throughput": 5766.95, "total_tokens": 4453824} +{"current_steps": 9065, "total_steps": 37885, "loss": 0.1786, "lr": 1.884191238824396e-06, "epoch": 1.1963837930579384, "percentage": 23.93, "elapsed_time": "0:12:52", "remaining_time": "0:40:56", "throughput": 5767.87, "total_tokens": 4456448} +{"current_steps": 9070, "total_steps": 37885, "loss": 0.1482, "lr": 1.883975941345624e-06, "epoch": 1.197043684835687, "percentage": 23.94, "elapsed_time": "0:12:52", "remaining_time": "0:40:55", "throughput": 5768.59, "total_tokens": 4458880} +{"current_steps": 9075, "total_steps": 37885, "loss": 0.1391, "lr": 1.8837604562493597e-06, "epoch": 1.1977035766134354, "percentage": 23.95, "elapsed_time": "0:12:53", "remaining_time": "0:40:54", "throughput": 5769.54, "total_tokens": 4461504} +{"current_steps": 9080, "total_steps": 37885, "loss": 0.1583, "lr": 1.883544783581338e-06, "epoch": 1.198363468391184, "percentage": 23.97, "elapsed_time": "0:12:53", "remaining_time": "0:40:54", "throughput": 5770.32, "total_tokens": 4464000} +{"current_steps": 9085, "total_steps": 37885, "loss": 0.0736, "lr": 1.8833289233873346e-06, "epoch": 1.1990233601689324, "percentage": 23.98, "elapsed_time": "0:12:53", "remaining_time": "0:40:53", "throughput": 5770.93, "total_tokens": 4466368} +{"current_steps": 9090, "total_steps": 37885, "loss": 0.1445, "lr": 1.8831128757131634e-06, "epoch": 1.1996832519466807, "percentage": 23.99, "elapsed_time": "0:12:54", "remaining_time": "0:40:52", "throughput": 5771.63, "total_tokens": 4468800} +{"current_steps": 9095, "total_steps": 37885, "loss": 0.1592, "lr": 1.8828966406046796e-06, "epoch": 1.2003431437244292, "percentage": 24.01, "elapsed_time": "0:12:54", "remaining_time": "0:40:51", "throughput": 5772.37, "total_tokens": 4471296} +{"current_steps": 9100, "total_steps": 37885, "loss": 0.0024, "lr": 1.8826802181077771e-06, "epoch": 1.2010030355021777, "percentage": 24.02, "elapsed_time": "0:12:54", "remaining_time": "0:40:51", "throughput": 5773.21, "total_tokens": 4473856} +{"current_steps": 9105, "total_steps": 37885, "loss": 0.0028, "lr": 1.8824636082683903e-06, "epoch": 1.2016629272799262, "percentage": 24.03, "elapsed_time": "0:12:55", "remaining_time": "0:40:50", "throughput": 5774.05, "total_tokens": 4476416} +{"current_steps": 9110, "total_steps": 37885, "loss": 0.044, "lr": 1.8822468111324927e-06, "epoch": 1.2023228190576745, "percentage": 24.05, "elapsed_time": "0:12:55", "remaining_time": "0:40:49", "throughput": 5774.73, "total_tokens": 4478848} +{"current_steps": 9115, "total_steps": 37885, "loss": 0.1416, "lr": 1.8820298267460983e-06, "epoch": 1.202982710835423, "percentage": 24.06, "elapsed_time": "0:12:55", "remaining_time": "0:40:49", "throughput": 5775.2, "total_tokens": 4481088} +{"current_steps": 9120, "total_steps": 37885, "loss": 0.0922, "lr": 1.8818126551552605e-06, "epoch": 1.2036426026131715, "percentage": 24.07, "elapsed_time": "0:12:56", "remaining_time": "0:40:48", "throughput": 5775.76, "total_tokens": 4483392} +{"current_steps": 9125, "total_steps": 37885, "loss": 0.1309, "lr": 1.881595296406072e-06, "epoch": 1.20430249439092, "percentage": 24.09, "elapsed_time": "0:12:56", "remaining_time": "0:40:47", "throughput": 5776.37, "total_tokens": 4485760} +{"current_steps": 9130, "total_steps": 37885, "loss": 0.0023, "lr": 1.881377750544666e-06, "epoch": 1.2049623861686682, "percentage": 24.1, "elapsed_time": "0:12:56", "remaining_time": "0:40:46", "throughput": 5776.91, "total_tokens": 4488064} +{"current_steps": 9135, "total_steps": 37885, "loss": 0.0897, "lr": 1.8811600176172147e-06, "epoch": 1.2056222779464167, "percentage": 24.11, "elapsed_time": "0:12:57", "remaining_time": "0:40:46", "throughput": 5777.05, "total_tokens": 4490048} +{"current_steps": 9140, "total_steps": 37885, "loss": 0.0015, "lr": 1.8809420976699308e-06, "epoch": 1.2062821697241652, "percentage": 24.13, "elapsed_time": "0:12:57", "remaining_time": "0:40:45", "throughput": 5777.74, "total_tokens": 4492480} +{"current_steps": 9145, "total_steps": 37885, "loss": 0.0265, "lr": 1.8807239907490656e-06, "epoch": 1.2069420615019137, "percentage": 24.14, "elapsed_time": "0:12:57", "remaining_time": "0:40:44", "throughput": 5778.66, "total_tokens": 4495104} +{"current_steps": 9150, "total_steps": 37885, "loss": 0.2775, "lr": 1.8805056969009114e-06, "epoch": 1.2076019532796622, "percentage": 24.15, "elapsed_time": "0:12:58", "remaining_time": "0:40:43", "throughput": 5779.2, "total_tokens": 4497408} +{"current_steps": 9155, "total_steps": 37885, "loss": 0.0605, "lr": 1.8802872161717988e-06, "epoch": 1.2082618450574105, "percentage": 24.17, "elapsed_time": "0:12:58", "remaining_time": "0:40:43", "throughput": 5779.59, "total_tokens": 4499584} +{"current_steps": 9160, "total_steps": 37885, "loss": 0.0287, "lr": 1.8800685486080994e-06, "epoch": 1.208921736835159, "percentage": 24.18, "elapsed_time": "0:12:58", "remaining_time": "0:40:42", "throughput": 5780.67, "total_tokens": 4502336} +{"current_steps": 9165, "total_steps": 37885, "loss": 0.0638, "lr": 1.8798496942562235e-06, "epoch": 1.2095816286129075, "percentage": 24.19, "elapsed_time": "0:12:59", "remaining_time": "0:40:41", "throughput": 5781.64, "total_tokens": 4505024} +{"current_steps": 9170, "total_steps": 37885, "loss": 0.0649, "lr": 1.879630653162621e-06, "epoch": 1.210241520390656, "percentage": 24.2, "elapsed_time": "0:12:59", "remaining_time": "0:40:41", "throughput": 5782.7, "total_tokens": 4507776} +{"current_steps": 9175, "total_steps": 37885, "loss": 0.0745, "lr": 1.8794114253737825e-06, "epoch": 1.2109014121684043, "percentage": 24.22, "elapsed_time": "0:12:59", "remaining_time": "0:40:40", "throughput": 5783.32, "total_tokens": 4510144} +{"current_steps": 9180, "total_steps": 37885, "loss": 0.0387, "lr": 1.8791920109362373e-06, "epoch": 1.2115613039461528, "percentage": 24.23, "elapsed_time": "0:13:00", "remaining_time": "0:40:39", "throughput": 5784.15, "total_tokens": 4512704} +{"current_steps": 9185, "total_steps": 37885, "loss": 0.0823, "lr": 1.878972409896554e-06, "epoch": 1.2122211957239013, "percentage": 24.24, "elapsed_time": "0:13:00", "remaining_time": "0:40:38", "throughput": 5784.97, "total_tokens": 4515264} +{"current_steps": 9190, "total_steps": 37885, "loss": 0.0804, "lr": 1.878752622301342e-06, "epoch": 1.2128810875016498, "percentage": 24.26, "elapsed_time": "0:13:00", "remaining_time": "0:40:38", "throughput": 5786.03, "total_tokens": 4518016} +{"current_steps": 9195, "total_steps": 37885, "loss": 0.0554, "lr": 1.8785326481972491e-06, "epoch": 1.213540979279398, "percentage": 24.27, "elapsed_time": "0:13:01", "remaining_time": "0:40:37", "throughput": 5786.57, "total_tokens": 4520320} +{"current_steps": 9200, "total_steps": 37885, "loss": 0.0832, "lr": 1.8783124876309637e-06, "epoch": 1.2142008710571466, "percentage": 24.28, "elapsed_time": "0:13:01", "remaining_time": "0:40:36", "throughput": 5786.85, "total_tokens": 4522432} +{"current_steps": 9205, "total_steps": 37885, "loss": 0.0009, "lr": 1.878092140649213e-06, "epoch": 1.214860762834895, "percentage": 24.3, "elapsed_time": "0:13:01", "remaining_time": "0:40:35", "throughput": 5787.52, "total_tokens": 4524864} +{"current_steps": 9210, "total_steps": 37885, "loss": 0.0041, "lr": 1.8778716072987638e-06, "epoch": 1.2155206546126436, "percentage": 24.31, "elapsed_time": "0:13:02", "remaining_time": "0:40:35", "throughput": 5788.22, "total_tokens": 4527296} +{"current_steps": 9215, "total_steps": 37885, "loss": 0.1339, "lr": 1.8776508876264235e-06, "epoch": 1.216180546390392, "percentage": 24.32, "elapsed_time": "0:13:02", "remaining_time": "0:40:34", "throughput": 5788.99, "total_tokens": 4529792} +{"current_steps": 9220, "total_steps": 37885, "loss": 0.0356, "lr": 1.8774299816790373e-06, "epoch": 1.2168404381681404, "percentage": 24.34, "elapsed_time": "0:13:02", "remaining_time": "0:40:33", "throughput": 5789.74, "total_tokens": 4532288} +{"current_steps": 9225, "total_steps": 37885, "loss": 0.0769, "lr": 1.8772088895034916e-06, "epoch": 1.2175003299458889, "percentage": 24.35, "elapsed_time": "0:13:03", "remaining_time": "0:40:33", "throughput": 5790.58, "total_tokens": 4534848} +{"current_steps": 9230, "total_steps": 37885, "loss": 0.0675, "lr": 1.876987611146711e-06, "epoch": 1.2181602217236374, "percentage": 24.36, "elapsed_time": "0:13:03", "remaining_time": "0:40:32", "throughput": 5791.26, "total_tokens": 4537280} +{"current_steps": 9235, "total_steps": 37885, "loss": 0.158, "lr": 1.876766146655661e-06, "epoch": 1.2188201135013859, "percentage": 24.38, "elapsed_time": "0:13:03", "remaining_time": "0:40:31", "throughput": 5792.02, "total_tokens": 4539776} +{"current_steps": 9240, "total_steps": 37885, "loss": 0.0008, "lr": 1.8765444960773453e-06, "epoch": 1.2194800052791341, "percentage": 24.39, "elapsed_time": "0:13:04", "remaining_time": "0:40:30", "throughput": 5792.63, "total_tokens": 4542144} +{"current_steps": 9245, "total_steps": 37885, "loss": 0.0557, "lr": 1.8763226594588078e-06, "epoch": 1.2201398970568826, "percentage": 24.4, "elapsed_time": "0:13:04", "remaining_time": "0:40:30", "throughput": 5793.31, "total_tokens": 4544576} +{"current_steps": 9250, "total_steps": 37885, "loss": 0.0475, "lr": 1.8761006368471315e-06, "epoch": 1.2207997888346311, "percentage": 24.42, "elapsed_time": "0:13:04", "remaining_time": "0:40:29", "throughput": 5794.27, "total_tokens": 4547264} +{"current_steps": 9255, "total_steps": 37885, "loss": 0.0003, "lr": 1.8758784282894394e-06, "epoch": 1.2214596806123796, "percentage": 24.43, "elapsed_time": "0:13:05", "remaining_time": "0:40:28", "throughput": 5794.94, "total_tokens": 4549696} +{"current_steps": 9260, "total_steps": 37885, "loss": 0.0956, "lr": 1.8756560338328934e-06, "epoch": 1.222119572390128, "percentage": 24.44, "elapsed_time": "0:13:05", "remaining_time": "0:40:28", "throughput": 5795.46, "total_tokens": 4552000} +{"current_steps": 9265, "total_steps": 37885, "loss": 0.0492, "lr": 1.8754334535246952e-06, "epoch": 1.2227794641678764, "percentage": 24.46, "elapsed_time": "0:13:05", "remaining_time": "0:40:27", "throughput": 5796.36, "total_tokens": 4554624} +{"current_steps": 9270, "total_steps": 37885, "loss": 0.0658, "lr": 1.875210687412086e-06, "epoch": 1.223439355945625, "percentage": 24.47, "elapsed_time": "0:13:06", "remaining_time": "0:40:26", "throughput": 5797.04, "total_tokens": 4557056} +{"current_steps": 9275, "total_steps": 37885, "loss": 0.0784, "lr": 1.874987735542346e-06, "epoch": 1.2240992477233734, "percentage": 24.48, "elapsed_time": "0:13:06", "remaining_time": "0:40:25", "throughput": 5797.71, "total_tokens": 4559488} +{"current_steps": 9280, "total_steps": 37885, "loss": 0.1279, "lr": 1.8747645979627955e-06, "epoch": 1.224759139501122, "percentage": 24.5, "elapsed_time": "0:13:06", "remaining_time": "0:40:25", "throughput": 5798.45, "total_tokens": 4561984} +{"current_steps": 9285, "total_steps": 37885, "loss": 0.0544, "lr": 1.8745412747207933e-06, "epoch": 1.2254190312788702, "percentage": 24.51, "elapsed_time": "0:13:07", "remaining_time": "0:40:24", "throughput": 5799.49, "total_tokens": 4564736} +{"current_steps": 9290, "total_steps": 37885, "loss": 0.0699, "lr": 1.8743177658637387e-06, "epoch": 1.2260789230566187, "percentage": 24.52, "elapsed_time": "0:13:07", "remaining_time": "0:40:23", "throughput": 5800.25, "total_tokens": 4567232} +{"current_steps": 9295, "total_steps": 37885, "loss": 0.1415, "lr": 1.8740940714390697e-06, "epoch": 1.2267388148343672, "percentage": 24.53, "elapsed_time": "0:13:07", "remaining_time": "0:40:22", "throughput": 5800.92, "total_tokens": 4569664} +{"current_steps": 9300, "total_steps": 37885, "loss": 0.0011, "lr": 1.8738701914942636e-06, "epoch": 1.2273987066121157, "percentage": 24.55, "elapsed_time": "0:13:08", "remaining_time": "0:40:22", "throughput": 5801.59, "total_tokens": 4572096} +{"current_steps": 9305, "total_steps": 37885, "loss": 0.0006, "lr": 1.8736461260768375e-06, "epoch": 1.228058598389864, "percentage": 24.56, "elapsed_time": "0:13:08", "remaining_time": "0:40:21", "throughput": 5802.26, "total_tokens": 4574528} +{"current_steps": 9310, "total_steps": 37885, "loss": 0.094, "lr": 1.8734218752343475e-06, "epoch": 1.2287184901676125, "percentage": 24.57, "elapsed_time": "0:13:08", "remaining_time": "0:40:20", "throughput": 5803.07, "total_tokens": 4577088} +{"current_steps": 9315, "total_steps": 37885, "loss": 0.0003, "lr": 1.8731974390143894e-06, "epoch": 1.229378381945361, "percentage": 24.59, "elapsed_time": "0:13:09", "remaining_time": "0:40:20", "throughput": 5803.68, "total_tokens": 4579456} +{"current_steps": 9320, "total_steps": 37885, "loss": 0.3725, "lr": 1.872972817464598e-06, "epoch": 1.2300382737231095, "percentage": 24.6, "elapsed_time": "0:13:09", "remaining_time": "0:40:19", "throughput": 5804.29, "total_tokens": 4581824} +{"current_steps": 9325, "total_steps": 37885, "loss": 0.0324, "lr": 1.8727480106326476e-06, "epoch": 1.2306981655008578, "percentage": 24.61, "elapsed_time": "0:13:09", "remaining_time": "0:40:18", "throughput": 5804.96, "total_tokens": 4584256} +{"current_steps": 9330, "total_steps": 37885, "loss": 0.1018, "lr": 1.872523018566252e-06, "epoch": 1.2313580572786063, "percentage": 24.63, "elapsed_time": "0:13:10", "remaining_time": "0:40:17", "throughput": 5806.0, "total_tokens": 4587008} +{"current_steps": 9335, "total_steps": 37885, "loss": 0.1404, "lr": 1.8722978413131641e-06, "epoch": 1.2320179490563548, "percentage": 24.64, "elapsed_time": "0:13:10", "remaining_time": "0:40:17", "throughput": 5807.1, "total_tokens": 4589824} +{"current_steps": 9340, "total_steps": 37885, "loss": 0.1816, "lr": 1.8720724789211758e-06, "epoch": 1.2326778408341033, "percentage": 24.65, "elapsed_time": "0:13:10", "remaining_time": "0:40:16", "throughput": 5807.98, "total_tokens": 4592448} +{"current_steps": 9345, "total_steps": 37885, "loss": 0.0031, "lr": 1.871846931438119e-06, "epoch": 1.2333377326118518, "percentage": 24.67, "elapsed_time": "0:13:11", "remaining_time": "0:40:15", "throughput": 5808.63, "total_tokens": 4594880} +{"current_steps": 9350, "total_steps": 37885, "loss": 0.4028, "lr": 1.8716211989118645e-06, "epoch": 1.2339976243896, "percentage": 24.68, "elapsed_time": "0:13:11", "remaining_time": "0:40:15", "throughput": 5809.08, "total_tokens": 4597120} +{"current_steps": 9355, "total_steps": 37885, "loss": 0.1788, "lr": 1.8713952813903222e-06, "epoch": 1.2346575161673485, "percentage": 24.69, "elapsed_time": "0:13:11", "remaining_time": "0:40:14", "throughput": 5809.75, "total_tokens": 4599552} +{"current_steps": 9360, "total_steps": 37885, "loss": 0.0456, "lr": 1.8711691789214416e-06, "epoch": 1.235317407945097, "percentage": 24.71, "elapsed_time": "0:13:12", "remaining_time": "0:40:13", "throughput": 5810.49, "total_tokens": 4602048} +{"current_steps": 9365, "total_steps": 37885, "loss": 0.0675, "lr": 1.8709428915532114e-06, "epoch": 1.2359772997228455, "percentage": 24.72, "elapsed_time": "0:13:12", "remaining_time": "0:40:13", "throughput": 5810.99, "total_tokens": 4604352} +{"current_steps": 9370, "total_steps": 37885, "loss": 0.1284, "lr": 1.8707164193336595e-06, "epoch": 1.2366371915005938, "percentage": 24.73, "elapsed_time": "0:13:12", "remaining_time": "0:40:12", "throughput": 5811.6, "total_tokens": 4606720} +{"current_steps": 9375, "total_steps": 37885, "loss": 0.1365, "lr": 1.8704897623108527e-06, "epoch": 1.2372970832783423, "percentage": 24.75, "elapsed_time": "0:13:13", "remaining_time": "0:40:11", "throughput": 5812.2, "total_tokens": 4609088} +{"current_steps": 9380, "total_steps": 37885, "loss": 0.0896, "lr": 1.8702629205328973e-06, "epoch": 1.2379569750560908, "percentage": 24.76, "elapsed_time": "0:13:13", "remaining_time": "0:40:10", "throughput": 5813.08, "total_tokens": 4611712} +{"current_steps": 9385, "total_steps": 37885, "loss": 0.1082, "lr": 1.8700358940479387e-06, "epoch": 1.2386168668338393, "percentage": 24.77, "elapsed_time": "0:13:13", "remaining_time": "0:40:10", "throughput": 5813.63, "total_tokens": 4614080} +{"current_steps": 9390, "total_steps": 37885, "loss": 0.0484, "lr": 1.8698086829041624e-06, "epoch": 1.2392767586115876, "percentage": 24.79, "elapsed_time": "0:13:13", "remaining_time": "0:40:09", "throughput": 5814.0, "total_tokens": 4616256} +{"current_steps": 9395, "total_steps": 37885, "loss": 0.0028, "lr": 1.8695812871497915e-06, "epoch": 1.239936650389336, "percentage": 24.8, "elapsed_time": "0:13:14", "remaining_time": "0:40:08", "throughput": 5814.64, "total_tokens": 4618688} +{"current_steps": 9400, "total_steps": 37885, "loss": 0.1368, "lr": 1.8693537068330898e-06, "epoch": 1.2405965421670846, "percentage": 24.81, "elapsed_time": "0:13:14", "remaining_time": "0:40:08", "throughput": 5815.36, "total_tokens": 4621184} +{"current_steps": 9405, "total_steps": 37885, "loss": 0.189, "lr": 1.8691259420023589e-06, "epoch": 1.241256433944833, "percentage": 24.83, "elapsed_time": "0:13:14", "remaining_time": "0:40:07", "throughput": 5816.03, "total_tokens": 4623616} +{"current_steps": 9410, "total_steps": 37885, "loss": 0.0022, "lr": 1.8688979927059405e-06, "epoch": 1.2419163257225816, "percentage": 24.84, "elapsed_time": "0:13:15", "remaining_time": "0:40:06", "throughput": 5816.69, "total_tokens": 4626048} +{"current_steps": 9415, "total_steps": 37885, "loss": 0.2181, "lr": 1.8686698589922154e-06, "epoch": 1.2425762175003299, "percentage": 24.85, "elapsed_time": "0:13:15", "remaining_time": "0:40:05", "throughput": 5817.42, "total_tokens": 4628544} +{"current_steps": 9420, "total_steps": 37885, "loss": 0.0009, "lr": 1.868441540909603e-06, "epoch": 1.2432361092780784, "percentage": 24.86, "elapsed_time": "0:13:15", "remaining_time": "0:40:05", "throughput": 5818.43, "total_tokens": 4631296} +{"current_steps": 9425, "total_steps": 37885, "loss": 0.0018, "lr": 1.8682130385065622e-06, "epoch": 1.2438960010558269, "percentage": 24.88, "elapsed_time": "0:13:16", "remaining_time": "0:40:04", "throughput": 5819.03, "total_tokens": 4633664} +{"current_steps": 9430, "total_steps": 37885, "loss": 0.001, "lr": 1.8679843518315911e-06, "epoch": 1.2445558928335754, "percentage": 24.89, "elapsed_time": "0:13:16", "remaining_time": "0:40:03", "throughput": 5819.83, "total_tokens": 4636224} +{"current_steps": 9435, "total_steps": 37885, "loss": 0.0792, "lr": 1.8677554809332272e-06, "epoch": 1.2452157846113237, "percentage": 24.9, "elapsed_time": "0:13:16", "remaining_time": "0:40:03", "throughput": 5820.57, "total_tokens": 4638720} +{"current_steps": 9440, "total_steps": 37885, "loss": 0.0399, "lr": 1.8675264258600459e-06, "epoch": 1.2458756763890722, "percentage": 24.92, "elapsed_time": "0:13:17", "remaining_time": "0:40:02", "throughput": 5821.41, "total_tokens": 4641280} +{"current_steps": 9445, "total_steps": 37885, "loss": 0.0117, "lr": 1.8672971866606627e-06, "epoch": 1.2465355681668207, "percentage": 24.93, "elapsed_time": "0:13:17", "remaining_time": "0:40:01", "throughput": 5822.01, "total_tokens": 4643648} +{"current_steps": 9450, "total_steps": 37885, "loss": 0.0879, "lr": 1.8670677633837321e-06, "epoch": 1.2471954599445692, "percentage": 24.94, "elapsed_time": "0:13:17", "remaining_time": "0:40:00", "throughput": 5822.59, "total_tokens": 4646016} +{"current_steps": 9455, "total_steps": 37885, "loss": 0.2114, "lr": 1.8668381560779478e-06, "epoch": 1.2478553517223174, "percentage": 24.96, "elapsed_time": "0:13:18", "remaining_time": "0:40:00", "throughput": 5823.09, "total_tokens": 4648320} +{"current_steps": 9460, "total_steps": 37885, "loss": 0.0495, "lr": 1.866608364792042e-06, "epoch": 1.248515243500066, "percentage": 24.97, "elapsed_time": "0:13:18", "remaining_time": "0:39:59", "throughput": 5823.97, "total_tokens": 4650944} +{"current_steps": 9465, "total_steps": 37885, "loss": 0.0025, "lr": 1.8663783895747863e-06, "epoch": 1.2491751352778144, "percentage": 24.98, "elapsed_time": "0:13:18", "remaining_time": "0:39:58", "throughput": 5824.71, "total_tokens": 4653440} +{"current_steps": 9470, "total_steps": 37885, "loss": 0.1382, "lr": 1.8661482304749911e-06, "epoch": 1.249835027055563, "percentage": 25.0, "elapsed_time": "0:13:19", "remaining_time": "0:39:58", "throughput": 5825.58, "total_tokens": 4656064} +{"current_steps": 9475, "total_steps": 37885, "loss": 0.1179, "lr": 1.8659178875415062e-06, "epoch": 1.2504949188333114, "percentage": 25.01, "elapsed_time": "0:13:19", "remaining_time": "0:39:57", "throughput": 5825.93, "total_tokens": 4658240} +{"current_steps": 9475, "total_steps": 37885, "eval_loss": 0.11660958081483841, "epoch": 1.2504949188333114, "percentage": 25.01, "elapsed_time": "0:13:27", "remaining_time": "0:40:20", "throughput": 5769.5, "total_tokens": 4658240} +{"current_steps": 9480, "total_steps": 37885, "loss": 0.001, "lr": 1.86568736082322e-06, "epoch": 1.2511548106110597, "percentage": 25.02, "elapsed_time": "0:14:02", "remaining_time": "0:42:04", "throughput": 5532.95, "total_tokens": 4660992} +{"current_steps": 9485, "total_steps": 37885, "loss": 0.0584, "lr": 1.8654566503690606e-06, "epoch": 1.2518147023888082, "percentage": 25.04, "elapsed_time": "0:14:02", "remaining_time": "0:42:03", "throughput": 5533.75, "total_tokens": 4663488} +{"current_steps": 9490, "total_steps": 37885, "loss": 0.0698, "lr": 1.8652257562279942e-06, "epoch": 1.2524745941665567, "percentage": 25.05, "elapsed_time": "0:14:03", "remaining_time": "0:42:02", "throughput": 5534.7, "total_tokens": 4666112} +{"current_steps": 9495, "total_steps": 37885, "loss": 0.1508, "lr": 1.864994678449026e-06, "epoch": 1.2531344859443052, "percentage": 25.06, "elapsed_time": "0:14:03", "remaining_time": "0:42:01", "throughput": 5535.9, "total_tokens": 4668992} +{"current_steps": 9500, "total_steps": 37885, "loss": 0.1196, "lr": 1.864763417081202e-06, "epoch": 1.2537943777220537, "percentage": 25.08, "elapsed_time": "0:14:03", "remaining_time": "0:42:00", "throughput": 5536.32, "total_tokens": 4671168} +{"current_steps": 9505, "total_steps": 37885, "loss": 0.0589, "lr": 1.864531972173604e-06, "epoch": 1.254454269499802, "percentage": 25.09, "elapsed_time": "0:14:04", "remaining_time": "0:42:00", "throughput": 5537.04, "total_tokens": 4673600} +{"current_steps": 9510, "total_steps": 37885, "loss": 0.0006, "lr": 1.8643003437753557e-06, "epoch": 1.2551141612775505, "percentage": 25.1, "elapsed_time": "0:14:04", "remaining_time": "0:41:59", "throughput": 5537.96, "total_tokens": 4676224} +{"current_steps": 9515, "total_steps": 37885, "loss": 0.1122, "lr": 1.8640685319356181e-06, "epoch": 1.255774053055299, "percentage": 25.12, "elapsed_time": "0:14:04", "remaining_time": "0:41:58", "throughput": 5538.62, "total_tokens": 4678592} +{"current_steps": 9520, "total_steps": 37885, "loss": 0.001, "lr": 1.8638365367035922e-06, "epoch": 1.2564339448330473, "percentage": 25.13, "elapsed_time": "0:14:05", "remaining_time": "0:41:57", "throughput": 5539.26, "total_tokens": 4680960} +{"current_steps": 9525, "total_steps": 37885, "loss": 0.0433, "lr": 1.863604358128516e-06, "epoch": 1.2570938366107958, "percentage": 25.14, "elapsed_time": "0:14:05", "remaining_time": "0:41:57", "throughput": 5539.83, "total_tokens": 4683264} +{"current_steps": 9530, "total_steps": 37885, "loss": 0.0433, "lr": 1.8633719962596693e-06, "epoch": 1.2577537283885443, "percentage": 25.16, "elapsed_time": "0:14:05", "remaining_time": "0:41:56", "throughput": 5540.61, "total_tokens": 4685760} +{"current_steps": 9535, "total_steps": 37885, "loss": 0.0812, "lr": 1.863139451146368e-06, "epoch": 1.2584136201662928, "percentage": 25.17, "elapsed_time": "0:14:06", "remaining_time": "0:41:55", "throughput": 5541.37, "total_tokens": 4688256} +{"current_steps": 9540, "total_steps": 37885, "loss": 0.0911, "lr": 1.8629067228379687e-06, "epoch": 1.2590735119440413, "percentage": 25.18, "elapsed_time": "0:14:06", "remaining_time": "0:41:54", "throughput": 5541.99, "total_tokens": 4690624} +{"current_steps": 9545, "total_steps": 37885, "loss": 0.0416, "lr": 1.8626738113838657e-06, "epoch": 1.2597334037217895, "percentage": 25.19, "elapsed_time": "0:14:06", "remaining_time": "0:41:53", "throughput": 5543.04, "total_tokens": 4693376} +{"current_steps": 9550, "total_steps": 37885, "loss": 0.0004, "lr": 1.8624407168334938e-06, "epoch": 1.260393295499538, "percentage": 25.21, "elapsed_time": "0:14:07", "remaining_time": "0:41:53", "throughput": 5543.88, "total_tokens": 4695936} +{"current_steps": 9555, "total_steps": 37885, "loss": 0.0927, "lr": 1.8622074392363249e-06, "epoch": 1.2610531872772865, "percentage": 25.22, "elapsed_time": "0:14:07", "remaining_time": "0:41:52", "throughput": 5544.52, "total_tokens": 4698304} +{"current_steps": 9560, "total_steps": 37885, "loss": 0.0017, "lr": 1.8619739786418707e-06, "epoch": 1.261713079055035, "percentage": 25.23, "elapsed_time": "0:14:07", "remaining_time": "0:41:51", "throughput": 5545.1, "total_tokens": 4700608} +{"current_steps": 9565, "total_steps": 37885, "loss": 0.0491, "lr": 1.8617403350996814e-06, "epoch": 1.2623729708327835, "percentage": 25.25, "elapsed_time": "0:14:08", "remaining_time": "0:41:50", "throughput": 5545.73, "total_tokens": 4702976} +{"current_steps": 9570, "total_steps": 37885, "loss": 0.1877, "lr": 1.861506508659346e-06, "epoch": 1.2630328626105318, "percentage": 25.26, "elapsed_time": "0:14:08", "remaining_time": "0:41:50", "throughput": 5546.44, "total_tokens": 4705408} +{"current_steps": 9575, "total_steps": 37885, "loss": 0.1504, "lr": 1.861272499370493e-06, "epoch": 1.2636927543882803, "percentage": 25.27, "elapsed_time": "0:14:08", "remaining_time": "0:41:49", "throughput": 5547.35, "total_tokens": 4708032} +{"current_steps": 9580, "total_steps": 37885, "loss": 0.073, "lr": 1.8610383072827887e-06, "epoch": 1.2643526461660288, "percentage": 25.29, "elapsed_time": "0:14:09", "remaining_time": "0:41:48", "throughput": 5547.96, "total_tokens": 4710400} +{"current_steps": 9585, "total_steps": 37885, "loss": 0.0615, "lr": 1.8608039324459388e-06, "epoch": 1.265012537943777, "percentage": 25.3, "elapsed_time": "0:14:09", "remaining_time": "0:41:47", "throughput": 5548.79, "total_tokens": 4712960} +{"current_steps": 9590, "total_steps": 37885, "loss": 0.0543, "lr": 1.8605693749096877e-06, "epoch": 1.2656724297215256, "percentage": 25.31, "elapsed_time": "0:14:09", "remaining_time": "0:41:46", "throughput": 5549.28, "total_tokens": 4715200} +{"current_steps": 9595, "total_steps": 37885, "loss": 0.1053, "lr": 1.8603346347238185e-06, "epoch": 1.266332321499274, "percentage": 25.33, "elapsed_time": "0:14:10", "remaining_time": "0:41:46", "throughput": 5549.93, "total_tokens": 4717568} +{"current_steps": 9600, "total_steps": 37885, "loss": 0.1185, "lr": 1.8600997119381533e-06, "epoch": 1.2669922132770226, "percentage": 25.34, "elapsed_time": "0:14:10", "remaining_time": "0:41:45", "throughput": 5550.55, "total_tokens": 4719936} +{"current_steps": 9605, "total_steps": 37885, "loss": 0.092, "lr": 1.8598646066025523e-06, "epoch": 1.267652105054771, "percentage": 25.35, "elapsed_time": "0:14:10", "remaining_time": "0:41:44", "throughput": 5551.25, "total_tokens": 4722368} +{"current_steps": 9610, "total_steps": 37885, "loss": 0.0026, "lr": 1.8596293187669155e-06, "epoch": 1.2683119968325194, "percentage": 25.37, "elapsed_time": "0:14:11", "remaining_time": "0:41:43", "throughput": 5552.03, "total_tokens": 4724864} +{"current_steps": 9615, "total_steps": 37885, "loss": 0.0039, "lr": 1.8593938484811806e-06, "epoch": 1.2689718886102679, "percentage": 25.38, "elapsed_time": "0:14:11", "remaining_time": "0:41:43", "throughput": 5552.86, "total_tokens": 4727424} +{"current_steps": 9620, "total_steps": 37885, "loss": 0.0911, "lr": 1.8591581957953245e-06, "epoch": 1.2696317803880164, "percentage": 25.39, "elapsed_time": "0:14:11", "remaining_time": "0:41:42", "throughput": 5553.28, "total_tokens": 4729600} +{"current_steps": 9625, "total_steps": 37885, "loss": 0.0008, "lr": 1.8589223607593628e-06, "epoch": 1.2702916721657649, "percentage": 25.41, "elapsed_time": "0:14:12", "remaining_time": "0:41:41", "throughput": 5554.32, "total_tokens": 4732352} +{"current_steps": 9630, "total_steps": 37885, "loss": 0.0029, "lr": 1.8586863434233502e-06, "epoch": 1.2709515639435134, "percentage": 25.42, "elapsed_time": "0:14:12", "remaining_time": "0:41:40", "throughput": 5555.08, "total_tokens": 4734848} +{"current_steps": 9635, "total_steps": 37885, "loss": 0.0696, "lr": 1.8584501438373793e-06, "epoch": 1.2716114557212617, "percentage": 25.43, "elapsed_time": "0:14:12", "remaining_time": "0:41:40", "throughput": 5555.69, "total_tokens": 4737216} +{"current_steps": 9640, "total_steps": 37885, "loss": 0.0958, "lr": 1.8582137620515816e-06, "epoch": 1.2722713474990102, "percentage": 25.45, "elapsed_time": "0:14:13", "remaining_time": "0:41:39", "throughput": 5556.48, "total_tokens": 4739712} +{"current_steps": 9645, "total_steps": 37885, "loss": 0.2084, "lr": 1.8579771981161277e-06, "epoch": 1.2729312392767587, "percentage": 25.46, "elapsed_time": "0:14:13", "remaining_time": "0:41:38", "throughput": 5557.19, "total_tokens": 4742144} +{"current_steps": 9650, "total_steps": 37885, "loss": 0.0001, "lr": 1.8577404520812262e-06, "epoch": 1.273591131054507, "percentage": 25.47, "elapsed_time": "0:14:13", "remaining_time": "0:41:37", "throughput": 5558.17, "total_tokens": 4744832} +{"current_steps": 9655, "total_steps": 37885, "loss": 0.0457, "lr": 1.8575035239971255e-06, "epoch": 1.2742510228322554, "percentage": 25.49, "elapsed_time": "0:14:14", "remaining_time": "0:41:37", "throughput": 5559.2, "total_tokens": 4747584} +{"current_steps": 9660, "total_steps": 37885, "loss": 0.0005, "lr": 1.857266413914111e-06, "epoch": 1.274910914610004, "percentage": 25.5, "elapsed_time": "0:14:14", "remaining_time": "0:41:36", "throughput": 5559.89, "total_tokens": 4750016} +{"current_steps": 9665, "total_steps": 37885, "loss": 0.0567, "lr": 1.8570291218825082e-06, "epoch": 1.2755708063877524, "percentage": 25.51, "elapsed_time": "0:14:14", "remaining_time": "0:41:35", "throughput": 5560.75, "total_tokens": 4752576} +{"current_steps": 9670, "total_steps": 37885, "loss": 0.0774, "lr": 1.8567916479526802e-06, "epoch": 1.276230698165501, "percentage": 25.52, "elapsed_time": "0:14:14", "remaining_time": "0:41:34", "throughput": 5561.18, "total_tokens": 4754752} +{"current_steps": 9675, "total_steps": 37885, "loss": 0.0006, "lr": 1.8565539921750295e-06, "epoch": 1.2768905899432492, "percentage": 25.54, "elapsed_time": "0:14:15", "remaining_time": "0:41:33", "throughput": 5561.54, "total_tokens": 4756864} +{"current_steps": 9680, "total_steps": 37885, "loss": 0.0764, "lr": 1.8563161545999965e-06, "epoch": 1.2775504817209977, "percentage": 25.55, "elapsed_time": "0:14:15", "remaining_time": "0:41:33", "throughput": 5562.4, "total_tokens": 4759424} +{"current_steps": 9685, "total_steps": 37885, "loss": 0.2287, "lr": 1.8560781352780607e-06, "epoch": 1.2782103734987462, "percentage": 25.56, "elapsed_time": "0:14:15", "remaining_time": "0:41:32", "throughput": 5563.04, "total_tokens": 4761792} +{"current_steps": 9690, "total_steps": 37885, "loss": 0.0725, "lr": 1.8558399342597402e-06, "epoch": 1.2788702652764947, "percentage": 25.58, "elapsed_time": "0:14:16", "remaining_time": "0:41:31", "throughput": 5564.09, "total_tokens": 4764544} +{"current_steps": 9695, "total_steps": 37885, "loss": 0.0003, "lr": 1.8556015515955907e-06, "epoch": 1.2795301570542432, "percentage": 25.59, "elapsed_time": "0:14:16", "remaining_time": "0:41:30", "throughput": 5564.74, "total_tokens": 4766912} +{"current_steps": 9700, "total_steps": 37885, "loss": 0.063, "lr": 1.8553629873362079e-06, "epoch": 1.2801900488319915, "percentage": 25.6, "elapsed_time": "0:14:16", "remaining_time": "0:41:30", "throughput": 5565.39, "total_tokens": 4769280} +{"current_steps": 9705, "total_steps": 37885, "loss": 0.0511, "lr": 1.855124241532225e-06, "epoch": 1.28084994060974, "percentage": 25.62, "elapsed_time": "0:14:17", "remaining_time": "0:41:29", "throughput": 5566.44, "total_tokens": 4772032} +{"current_steps": 9710, "total_steps": 37885, "loss": 0.0003, "lr": 1.8548853142343142e-06, "epoch": 1.2815098323874885, "percentage": 25.63, "elapsed_time": "0:14:17", "remaining_time": "0:41:28", "throughput": 5567.07, "total_tokens": 4774400} +{"current_steps": 9715, "total_steps": 37885, "loss": 0.2591, "lr": 1.854646205493186e-06, "epoch": 1.2821697241652368, "percentage": 25.64, "elapsed_time": "0:14:17", "remaining_time": "0:41:27", "throughput": 5567.56, "total_tokens": 4776640} +{"current_steps": 9720, "total_steps": 37885, "loss": 0.0848, "lr": 1.8544069153595896e-06, "epoch": 1.2828296159429853, "percentage": 25.66, "elapsed_time": "0:14:18", "remaining_time": "0:41:26", "throughput": 5568.2, "total_tokens": 4779008} +{"current_steps": 9725, "total_steps": 37885, "loss": 0.2256, "lr": 1.8541674438843125e-06, "epoch": 1.2834895077207338, "percentage": 25.67, "elapsed_time": "0:14:18", "remaining_time": "0:41:26", "throughput": 5569.17, "total_tokens": 4781696} +{"current_steps": 9730, "total_steps": 37885, "loss": 0.0343, "lr": 1.8539277911181809e-06, "epoch": 1.2841493994984823, "percentage": 25.68, "elapsed_time": "0:14:18", "remaining_time": "0:41:25", "throughput": 5569.97, "total_tokens": 4784192} +{"current_steps": 9735, "total_steps": 37885, "loss": 0.0027, "lr": 1.8536879571120593e-06, "epoch": 1.2848092912762308, "percentage": 25.7, "elapsed_time": "0:14:19", "remaining_time": "0:41:24", "throughput": 5570.95, "total_tokens": 4786880} +{"current_steps": 9740, "total_steps": 37885, "loss": 0.1961, "lr": 1.8534479419168508e-06, "epoch": 1.285469183053979, "percentage": 25.71, "elapsed_time": "0:14:19", "remaining_time": "0:41:23", "throughput": 5572.05, "total_tokens": 4789696} +{"current_steps": 9745, "total_steps": 37885, "loss": 0.0241, "lr": 1.8532077455834964e-06, "epoch": 1.2861290748317276, "percentage": 25.72, "elapsed_time": "0:14:19", "remaining_time": "0:41:23", "throughput": 5573.02, "total_tokens": 4792384} +{"current_steps": 9750, "total_steps": 37885, "loss": 0.1954, "lr": 1.8529673681629766e-06, "epoch": 1.286788966609476, "percentage": 25.74, "elapsed_time": "0:14:20", "remaining_time": "0:41:22", "throughput": 5573.83, "total_tokens": 4794944} +{"current_steps": 9755, "total_steps": 37885, "loss": 0.0025, "lr": 1.85272680970631e-06, "epoch": 1.2874488583872246, "percentage": 25.75, "elapsed_time": "0:14:20", "remaining_time": "0:41:21", "throughput": 5574.52, "total_tokens": 4797376} +{"current_steps": 9760, "total_steps": 37885, "loss": 0.0051, "lr": 1.8524860702645527e-06, "epoch": 1.288108750164973, "percentage": 25.76, "elapsed_time": "0:14:20", "remaining_time": "0:41:20", "throughput": 5575.21, "total_tokens": 4799808} +{"current_steps": 9765, "total_steps": 37885, "loss": 0.0732, "lr": 1.8522451498888004e-06, "epoch": 1.2887686419427213, "percentage": 25.78, "elapsed_time": "0:14:21", "remaining_time": "0:41:20", "throughput": 5576.24, "total_tokens": 4802560} +{"current_steps": 9770, "total_steps": 37885, "loss": 0.0006, "lr": 1.8520040486301862e-06, "epoch": 1.2894285337204698, "percentage": 25.79, "elapsed_time": "0:14:21", "remaining_time": "0:41:19", "throughput": 5576.61, "total_tokens": 4804736} +{"current_steps": 9775, "total_steps": 37885, "loss": 0.1509, "lr": 1.8517627665398825e-06, "epoch": 1.2900884254982183, "percentage": 25.8, "elapsed_time": "0:14:21", "remaining_time": "0:41:18", "throughput": 5577.14, "total_tokens": 4807040} +{"current_steps": 9780, "total_steps": 37885, "loss": 0.0015, "lr": 1.8515213036690996e-06, "epoch": 1.2907483172759666, "percentage": 25.81, "elapsed_time": "0:14:22", "remaining_time": "0:41:17", "throughput": 5577.47, "total_tokens": 4809152} +{"current_steps": 9785, "total_steps": 37885, "loss": 0.0002, "lr": 1.8512796600690864e-06, "epoch": 1.2914082090537151, "percentage": 25.83, "elapsed_time": "0:14:22", "remaining_time": "0:41:17", "throughput": 5578.39, "total_tokens": 4811776} +{"current_steps": 9790, "total_steps": 37885, "loss": 0.0003, "lr": 1.8510378357911297e-06, "epoch": 1.2920681008314636, "percentage": 25.84, "elapsed_time": "0:14:22", "remaining_time": "0:41:16", "throughput": 5579.15, "total_tokens": 4814272} +{"current_steps": 9795, "total_steps": 37885, "loss": 0.0535, "lr": 1.8507958308865551e-06, "epoch": 1.2927279926092121, "percentage": 25.85, "elapsed_time": "0:14:23", "remaining_time": "0:41:15", "throughput": 5579.67, "total_tokens": 4816576} +{"current_steps": 9800, "total_steps": 37885, "loss": 0.0654, "lr": 1.8505536454067264e-06, "epoch": 1.2933878843869606, "percentage": 25.87, "elapsed_time": "0:14:23", "remaining_time": "0:41:14", "throughput": 5580.54, "total_tokens": 4819200} +{"current_steps": 9805, "total_steps": 37885, "loss": 0.134, "lr": 1.8503112794030456e-06, "epoch": 1.294047776164709, "percentage": 25.88, "elapsed_time": "0:14:23", "remaining_time": "0:41:14", "throughput": 5581.44, "total_tokens": 4821824} +{"current_steps": 9810, "total_steps": 37885, "loss": 0.0005, "lr": 1.8500687329269532e-06, "epoch": 1.2947076679424574, "percentage": 25.89, "elapsed_time": "0:14:24", "remaining_time": "0:41:13", "throughput": 5582.46, "total_tokens": 4824576} +{"current_steps": 9815, "total_steps": 37885, "loss": 0.0988, "lr": 1.8498260060299282e-06, "epoch": 1.295367559720206, "percentage": 25.91, "elapsed_time": "0:14:24", "remaining_time": "0:41:12", "throughput": 5583.29, "total_tokens": 4827136} +{"current_steps": 9820, "total_steps": 37885, "loss": 0.0933, "lr": 1.849583098763487e-06, "epoch": 1.2960274514979544, "percentage": 25.92, "elapsed_time": "0:14:24", "remaining_time": "0:41:11", "throughput": 5583.68, "total_tokens": 4829312} +{"current_steps": 9825, "total_steps": 37885, "loss": 0.0507, "lr": 1.8493400111791858e-06, "epoch": 1.296687343275703, "percentage": 25.93, "elapsed_time": "0:14:25", "remaining_time": "0:41:11", "throughput": 5584.44, "total_tokens": 4831808} +{"current_steps": 9830, "total_steps": 37885, "loss": 0.1303, "lr": 1.8490967433286172e-06, "epoch": 1.2973472350534512, "percentage": 25.95, "elapsed_time": "0:14:25", "remaining_time": "0:41:10", "throughput": 5584.87, "total_tokens": 4834048} +{"current_steps": 9835, "total_steps": 37885, "loss": 0.0016, "lr": 1.8488532952634138e-06, "epoch": 1.2980071268311997, "percentage": 25.96, "elapsed_time": "0:14:25", "remaining_time": "0:41:09", "throughput": 5585.48, "total_tokens": 4836416} +{"current_steps": 9840, "total_steps": 37885, "loss": 0.0792, "lr": 1.8486096670352448e-06, "epoch": 1.2986670186089482, "percentage": 25.97, "elapsed_time": "0:14:26", "remaining_time": "0:41:08", "throughput": 5585.92, "total_tokens": 4838656} +{"current_steps": 9845, "total_steps": 37885, "loss": 0.1515, "lr": 1.8483658586958198e-06, "epoch": 1.2993269103866965, "percentage": 25.99, "elapsed_time": "0:14:26", "remaining_time": "0:41:08", "throughput": 5586.49, "total_tokens": 4841024} +{"current_steps": 9850, "total_steps": 37885, "loss": 0.0899, "lr": 1.8481218702968845e-06, "epoch": 1.299986802164445, "percentage": 26.0, "elapsed_time": "0:14:26", "remaining_time": "0:41:07", "throughput": 5587.2, "total_tokens": 4843520} +{"current_steps": 9855, "total_steps": 37885, "loss": 0.0714, "lr": 1.8478777018902236e-06, "epoch": 1.3006466939421935, "percentage": 26.01, "elapsed_time": "0:14:27", "remaining_time": "0:41:06", "throughput": 5588.13, "total_tokens": 4846208} +{"current_steps": 9860, "total_steps": 37885, "loss": 0.1439, "lr": 1.8476333535276605e-06, "epoch": 1.301306585719942, "percentage": 26.03, "elapsed_time": "0:14:27", "remaining_time": "0:41:05", "throughput": 5588.93, "total_tokens": 4848768} +{"current_steps": 9865, "total_steps": 37885, "loss": 0.0974, "lr": 1.8473888252610563e-06, "epoch": 1.3019664774976905, "percentage": 26.04, "elapsed_time": "0:14:27", "remaining_time": "0:41:05", "throughput": 5589.68, "total_tokens": 4851264} +{"current_steps": 9870, "total_steps": 37885, "loss": 0.1057, "lr": 1.8471441171423101e-06, "epoch": 1.3026263692754387, "percentage": 26.05, "elapsed_time": "0:14:28", "remaining_time": "0:41:04", "throughput": 5590.27, "total_tokens": 4853632} +{"current_steps": 9875, "total_steps": 37885, "loss": 0.0011, "lr": 1.8468992292233595e-06, "epoch": 1.3032862610531872, "percentage": 26.07, "elapsed_time": "0:14:28", "remaining_time": "0:41:03", "throughput": 5591.15, "total_tokens": 4856256} +{"current_steps": 9880, "total_steps": 37885, "loss": 0.076, "lr": 1.8466541615561804e-06, "epoch": 1.3039461528309357, "percentage": 26.08, "elapsed_time": "0:14:28", "remaining_time": "0:41:02", "throughput": 5591.87, "total_tokens": 4858752} +{"current_steps": 9885, "total_steps": 37885, "loss": 0.0014, "lr": 1.8464089141927866e-06, "epoch": 1.3046060446086842, "percentage": 26.09, "elapsed_time": "0:14:29", "remaining_time": "0:41:02", "throughput": 5592.58, "total_tokens": 4861248} +{"current_steps": 9890, "total_steps": 37885, "loss": 0.2671, "lr": 1.8461634871852298e-06, "epoch": 1.3052659363864327, "percentage": 26.11, "elapsed_time": "0:14:29", "remaining_time": "0:41:01", "throughput": 5593.28, "total_tokens": 4863744} +{"current_steps": 9895, "total_steps": 37885, "loss": 0.0681, "lr": 1.8459178805856003e-06, "epoch": 1.305925828164181, "percentage": 26.12, "elapsed_time": "0:14:29", "remaining_time": "0:41:00", "throughput": 5593.74, "total_tokens": 4865984} +{"current_steps": 9900, "total_steps": 37885, "loss": 0.1544, "lr": 1.8456720944460265e-06, "epoch": 1.3065857199419295, "percentage": 26.13, "elapsed_time": "0:14:30", "remaining_time": "0:40:59", "throughput": 5594.43, "total_tokens": 4868480} +{"current_steps": 9905, "total_steps": 37885, "loss": 0.1641, "lr": 1.8454261288186741e-06, "epoch": 1.307245611719678, "percentage": 26.14, "elapsed_time": "0:14:30", "remaining_time": "0:40:59", "throughput": 5595.17, "total_tokens": 4870976} +{"current_steps": 9910, "total_steps": 37885, "loss": 0.0584, "lr": 1.8451799837557483e-06, "epoch": 1.3079055034974263, "percentage": 26.16, "elapsed_time": "0:14:30", "remaining_time": "0:40:58", "throughput": 5595.9, "total_tokens": 4873472} +{"current_steps": 9915, "total_steps": 37885, "loss": 0.0582, "lr": 1.8449336593094914e-06, "epoch": 1.3085653952751748, "percentage": 26.17, "elapsed_time": "0:14:31", "remaining_time": "0:40:57", "throughput": 5596.8, "total_tokens": 4876160} +{"current_steps": 9920, "total_steps": 37885, "loss": 0.0677, "lr": 1.8446871555321834e-06, "epoch": 1.3092252870529233, "percentage": 26.18, "elapsed_time": "0:14:31", "remaining_time": "0:40:57", "throughput": 5597.25, "total_tokens": 4878400} +{"current_steps": 9925, "total_steps": 37885, "loss": 0.0026, "lr": 1.8444404724761436e-06, "epoch": 1.3098851788306718, "percentage": 26.2, "elapsed_time": "0:14:31", "remaining_time": "0:40:56", "throughput": 5598.22, "total_tokens": 4881152} +{"current_steps": 9930, "total_steps": 37885, "loss": 0.0665, "lr": 1.8441936101937285e-06, "epoch": 1.3105450706084203, "percentage": 26.21, "elapsed_time": "0:14:32", "remaining_time": "0:40:55", "throughput": 5598.96, "total_tokens": 4883648} +{"current_steps": 9935, "total_steps": 37885, "loss": 0.1721, "lr": 1.8439465687373328e-06, "epoch": 1.3112049623861686, "percentage": 26.22, "elapsed_time": "0:14:32", "remaining_time": "0:40:54", "throughput": 5599.29, "total_tokens": 4885760} +{"current_steps": 9940, "total_steps": 37885, "loss": 0.0015, "lr": 1.8436993481593891e-06, "epoch": 1.311864854163917, "percentage": 26.24, "elapsed_time": "0:14:32", "remaining_time": "0:40:54", "throughput": 5600.15, "total_tokens": 4888384} +{"current_steps": 9945, "total_steps": 37885, "loss": 0.084, "lr": 1.8434519485123685e-06, "epoch": 1.3125247459416656, "percentage": 26.25, "elapsed_time": "0:14:33", "remaining_time": "0:40:53", "throughput": 5600.88, "total_tokens": 4890880} +{"current_steps": 9950, "total_steps": 37885, "loss": 0.0374, "lr": 1.8432043698487796e-06, "epoch": 1.313184637719414, "percentage": 26.26, "elapsed_time": "0:14:33", "remaining_time": "0:40:52", "throughput": 5601.39, "total_tokens": 4893184} +{"current_steps": 9955, "total_steps": 37885, "loss": 0.0611, "lr": 1.8429566122211693e-06, "epoch": 1.3138445294971626, "percentage": 26.28, "elapsed_time": "0:14:33", "remaining_time": "0:40:51", "throughput": 5601.96, "total_tokens": 4895552} +{"current_steps": 9960, "total_steps": 37885, "loss": 0.1239, "lr": 1.8427086756821222e-06, "epoch": 1.3145044212749109, "percentage": 26.29, "elapsed_time": "0:14:34", "remaining_time": "0:40:51", "throughput": 5602.48, "total_tokens": 4897856} +{"current_steps": 9965, "total_steps": 37885, "loss": 0.1224, "lr": 1.842460560284261e-06, "epoch": 1.3151643130526594, "percentage": 26.3, "elapsed_time": "0:14:34", "remaining_time": "0:40:50", "throughput": 5603.21, "total_tokens": 4900352} +{"current_steps": 9970, "total_steps": 37885, "loss": 0.0006, "lr": 1.8422122660802466e-06, "epoch": 1.3158242048304079, "percentage": 26.32, "elapsed_time": "0:14:34", "remaining_time": "0:40:49", "throughput": 5604.12, "total_tokens": 4903040} +{"current_steps": 9975, "total_steps": 37885, "loss": 0.0633, "lr": 1.8419637931227776e-06, "epoch": 1.3164840966081561, "percentage": 26.33, "elapsed_time": "0:14:35", "remaining_time": "0:40:48", "throughput": 5604.97, "total_tokens": 4905664} +{"current_steps": 9980, "total_steps": 37885, "loss": 0.0512, "lr": 1.8417151414645904e-06, "epoch": 1.3171439883859046, "percentage": 26.34, "elapsed_time": "0:14:35", "remaining_time": "0:40:48", "throughput": 5605.31, "total_tokens": 4907840} +{"current_steps": 9985, "total_steps": 37885, "loss": 0.0012, "lr": 1.84146631115846e-06, "epoch": 1.3178038801636531, "percentage": 26.36, "elapsed_time": "0:14:35", "remaining_time": "0:40:47", "throughput": 5605.83, "total_tokens": 4910144} +{"current_steps": 9990, "total_steps": 37885, "loss": 0.1102, "lr": 1.8412173022571979e-06, "epoch": 1.3184637719414016, "percentage": 26.37, "elapsed_time": "0:14:36", "remaining_time": "0:40:46", "throughput": 5606.55, "total_tokens": 4912640} +{"current_steps": 9995, "total_steps": 37885, "loss": 0.0006, "lr": 1.8409681148136556e-06, "epoch": 1.3191236637191501, "percentage": 26.38, "elapsed_time": "0:14:36", "remaining_time": "0:40:45", "throughput": 5607.04, "total_tokens": 4914944} +{"current_steps": 10000, "total_steps": 37885, "loss": 0.0516, "lr": 1.8407187488807203e-06, "epoch": 1.3197835554968984, "percentage": 26.4, "elapsed_time": "0:14:36", "remaining_time": "0:40:45", "throughput": 5607.88, "total_tokens": 4917568} +{"current_steps": 10005, "total_steps": 37885, "loss": 0.0525, "lr": 1.8404692045113185e-06, "epoch": 1.320443447274647, "percentage": 26.41, "elapsed_time": "0:14:37", "remaining_time": "0:40:44", "throughput": 5608.17, "total_tokens": 4919680} +{"current_steps": 10010, "total_steps": 37885, "loss": 0.0183, "lr": 1.8402194817584147e-06, "epoch": 1.3211033390523954, "percentage": 26.42, "elapsed_time": "0:14:37", "remaining_time": "0:40:43", "throughput": 5608.53, "total_tokens": 4921856} +{"current_steps": 10015, "total_steps": 37885, "loss": 0.0421, "lr": 1.8399695806750098e-06, "epoch": 1.321763230830144, "percentage": 26.44, "elapsed_time": "0:14:37", "remaining_time": "0:40:43", "throughput": 5609.19, "total_tokens": 4924288} +{"current_steps": 10020, "total_steps": 37885, "loss": 0.1288, "lr": 1.8397195013141445e-06, "epoch": 1.3224231226078924, "percentage": 26.45, "elapsed_time": "0:14:38", "remaining_time": "0:40:42", "throughput": 5609.63, "total_tokens": 4926528} +{"current_steps": 10025, "total_steps": 37885, "loss": 0.004, "lr": 1.8394692437288954e-06, "epoch": 1.3230830143856407, "percentage": 26.46, "elapsed_time": "0:14:38", "remaining_time": "0:40:41", "throughput": 5610.68, "total_tokens": 4929344} +{"current_steps": 10030, "total_steps": 37885, "loss": 0.0934, "lr": 1.8392188079723784e-06, "epoch": 1.3237429061633892, "percentage": 26.47, "elapsed_time": "0:14:38", "remaining_time": "0:40:40", "throughput": 5611.3, "total_tokens": 4931776} +{"current_steps": 10035, "total_steps": 37885, "loss": 0.0003, "lr": 1.8389681940977467e-06, "epoch": 1.3244027979411377, "percentage": 26.49, "elapsed_time": "0:14:39", "remaining_time": "0:40:40", "throughput": 5612.03, "total_tokens": 4934272} +{"current_steps": 10040, "total_steps": 37885, "loss": 0.4409, "lr": 1.838717402158191e-06, "epoch": 1.325062689718886, "percentage": 26.5, "elapsed_time": "0:14:39", "remaining_time": "0:40:39", "throughput": 5612.94, "total_tokens": 4936960} +{"current_steps": 10045, "total_steps": 37885, "loss": 0.2134, "lr": 1.83846643220694e-06, "epoch": 1.3257225814966347, "percentage": 26.51, "elapsed_time": "0:14:39", "remaining_time": "0:40:38", "throughput": 5613.96, "total_tokens": 4939776} +{"current_steps": 10050, "total_steps": 37885, "loss": 0.0947, "lr": 1.8382152842972607e-06, "epoch": 1.326382473274383, "percentage": 26.53, "elapsed_time": "0:14:40", "remaining_time": "0:40:37", "throughput": 5614.58, "total_tokens": 4942208} +{"current_steps": 10055, "total_steps": 37885, "loss": 0.0015, "lr": 1.8379639584824572e-06, "epoch": 1.3270423650521315, "percentage": 26.54, "elapsed_time": "0:14:40", "remaining_time": "0:40:37", "throughput": 5614.98, "total_tokens": 4944448} +{"current_steps": 10060, "total_steps": 37885, "loss": 0.177, "lr": 1.8377124548158713e-06, "epoch": 1.32770225682988, "percentage": 26.55, "elapsed_time": "0:14:40", "remaining_time": "0:40:36", "throughput": 5615.52, "total_tokens": 4946816} +{"current_steps": 10065, "total_steps": 37885, "loss": 0.0229, "lr": 1.8374607733508833e-06, "epoch": 1.3283621486076282, "percentage": 26.57, "elapsed_time": "0:14:41", "remaining_time": "0:40:35", "throughput": 5616.07, "total_tokens": 4949184} +{"current_steps": 10070, "total_steps": 37885, "loss": 0.1654, "lr": 1.8372089141409108e-06, "epoch": 1.3290220403853767, "percentage": 26.58, "elapsed_time": "0:14:41", "remaining_time": "0:40:35", "throughput": 5616.72, "total_tokens": 4951616} +{"current_steps": 10075, "total_steps": 37885, "loss": 0.1656, "lr": 1.8369568772394087e-06, "epoch": 1.3296819321631252, "percentage": 26.59, "elapsed_time": "0:14:41", "remaining_time": "0:40:34", "throughput": 5617.35, "total_tokens": 4954048} +{"current_steps": 10080, "total_steps": 37885, "loss": 0.1187, "lr": 1.8367046626998702e-06, "epoch": 1.3303418239408737, "percentage": 26.61, "elapsed_time": "0:14:42", "remaining_time": "0:40:33", "throughput": 5617.59, "total_tokens": 4956160} +{"current_steps": 10085, "total_steps": 37885, "loss": 0.1228, "lr": 1.8364522705758257e-06, "epoch": 1.3310017157186222, "percentage": 26.62, "elapsed_time": "0:14:42", "remaining_time": "0:40:32", "throughput": 5618.12, "total_tokens": 4958528} +{"current_steps": 10090, "total_steps": 37885, "loss": 0.1143, "lr": 1.836199700920844e-06, "epoch": 1.3316616074963705, "percentage": 26.63, "elapsed_time": "0:14:42", "remaining_time": "0:40:32", "throughput": 5618.69, "total_tokens": 4960896} +{"current_steps": 10095, "total_steps": 37885, "loss": 0.0022, "lr": 1.8359469537885312e-06, "epoch": 1.332321499274119, "percentage": 26.65, "elapsed_time": "0:14:43", "remaining_time": "0:40:31", "throughput": 5619.47, "total_tokens": 4963456} +{"current_steps": 10100, "total_steps": 37885, "loss": 0.0887, "lr": 1.835694029232531e-06, "epoch": 1.3329813910518675, "percentage": 26.66, "elapsed_time": "0:14:43", "remaining_time": "0:40:30", "throughput": 5619.82, "total_tokens": 4965632} +{"current_steps": 10105, "total_steps": 37885, "loss": 0.1001, "lr": 1.8354409273065247e-06, "epoch": 1.333641282829616, "percentage": 26.67, "elapsed_time": "0:14:43", "remaining_time": "0:40:30", "throughput": 5620.31, "total_tokens": 4967936} +{"current_steps": 10110, "total_steps": 37885, "loss": 0.0025, "lr": 1.835187648064231e-06, "epoch": 1.3343011746073645, "percentage": 26.69, "elapsed_time": "0:14:44", "remaining_time": "0:40:29", "throughput": 5620.82, "total_tokens": 4970240} +{"current_steps": 10115, "total_steps": 37885, "loss": 0.001, "lr": 1.8349341915594073e-06, "epoch": 1.3349610663851128, "percentage": 26.7, "elapsed_time": "0:14:44", "remaining_time": "0:40:28", "throughput": 5621.77, "total_tokens": 4972992} +{"current_steps": 10120, "total_steps": 37885, "loss": 0.1337, "lr": 1.8346805578458474e-06, "epoch": 1.3356209581628613, "percentage": 26.71, "elapsed_time": "0:14:44", "remaining_time": "0:40:27", "throughput": 5622.63, "total_tokens": 4975616} +{"current_steps": 10125, "total_steps": 37885, "loss": 0.0462, "lr": 1.8344267469773835e-06, "epoch": 1.3362808499406098, "percentage": 26.73, "elapsed_time": "0:14:45", "remaining_time": "0:40:27", "throughput": 5623.36, "total_tokens": 4978112} +{"current_steps": 10130, "total_steps": 37885, "loss": 0.0005, "lr": 1.8341727590078847e-06, "epoch": 1.336940741718358, "percentage": 26.74, "elapsed_time": "0:14:45", "remaining_time": "0:40:26", "throughput": 5623.82, "total_tokens": 4980352} +{"current_steps": 10135, "total_steps": 37885, "loss": 0.0783, "lr": 1.8339185939912589e-06, "epoch": 1.3376006334961066, "percentage": 26.75, "elapsed_time": "0:14:45", "remaining_time": "0:40:25", "throughput": 5624.33, "total_tokens": 4982656} +{"current_steps": 10140, "total_steps": 37885, "loss": 0.0077, "lr": 1.83366425198145e-06, "epoch": 1.338260525273855, "percentage": 26.77, "elapsed_time": "0:14:46", "remaining_time": "0:40:24", "throughput": 5625.12, "total_tokens": 4985216} +{"current_steps": 10145, "total_steps": 37885, "loss": 0.0159, "lr": 1.8334097330324405e-06, "epoch": 1.3389204170516036, "percentage": 26.78, "elapsed_time": "0:14:46", "remaining_time": "0:40:24", "throughput": 5626.04, "total_tokens": 4987904} +{"current_steps": 10150, "total_steps": 37885, "loss": 0.0203, "lr": 1.8331550371982503e-06, "epoch": 1.339580308829352, "percentage": 26.79, "elapsed_time": "0:14:46", "remaining_time": "0:40:23", "throughput": 5626.77, "total_tokens": 4990400} +{"current_steps": 10155, "total_steps": 37885, "loss": 0.071, "lr": 1.8329001645329364e-06, "epoch": 1.3402402006071004, "percentage": 26.8, "elapsed_time": "0:14:47", "remaining_time": "0:40:22", "throughput": 5627.55, "total_tokens": 4992960} +{"current_steps": 10160, "total_steps": 37885, "loss": 0.0032, "lr": 1.8326451150905945e-06, "epoch": 1.3409000923848489, "percentage": 26.82, "elapsed_time": "0:14:47", "remaining_time": "0:40:22", "throughput": 5628.41, "total_tokens": 4995584} +{"current_steps": 10165, "total_steps": 37885, "loss": 0.1142, "lr": 1.8323898889253562e-06, "epoch": 1.3415599841625974, "percentage": 26.83, "elapsed_time": "0:14:47", "remaining_time": "0:40:21", "throughput": 5629.0, "total_tokens": 4997952} +{"current_steps": 10170, "total_steps": 37885, "loss": 0.1238, "lr": 1.8321344860913918e-06, "epoch": 1.3422198759403459, "percentage": 26.84, "elapsed_time": "0:14:48", "remaining_time": "0:40:20", "throughput": 5629.26, "total_tokens": 5000000} +{"current_steps": 10175, "total_steps": 37885, "loss": 0.0662, "lr": 1.8318789066429083e-06, "epoch": 1.3428797677180944, "percentage": 26.86, "elapsed_time": "0:14:48", "remaining_time": "0:40:19", "throughput": 5630.17, "total_tokens": 5002688} +{"current_steps": 10180, "total_steps": 37885, "loss": 0.0005, "lr": 1.831623150634151e-06, "epoch": 1.3435396594958426, "percentage": 26.87, "elapsed_time": "0:14:48", "remaining_time": "0:40:19", "throughput": 5630.91, "total_tokens": 5005184} +{"current_steps": 10185, "total_steps": 37885, "loss": 0.1373, "lr": 1.8313672181194023e-06, "epoch": 1.3441995512735911, "percentage": 26.88, "elapsed_time": "0:14:49", "remaining_time": "0:40:18", "throughput": 5631.37, "total_tokens": 5007424} +{"current_steps": 10190, "total_steps": 37885, "loss": 0.0557, "lr": 1.8311111091529817e-06, "epoch": 1.3448594430513396, "percentage": 26.9, "elapsed_time": "0:14:49", "remaining_time": "0:40:17", "throughput": 5632.29, "total_tokens": 5010112} +{"current_steps": 10195, "total_steps": 37885, "loss": 0.0594, "lr": 1.8308548237892465e-06, "epoch": 1.345519334829088, "percentage": 26.91, "elapsed_time": "0:14:49", "remaining_time": "0:40:16", "throughput": 5633.15, "total_tokens": 5012736} +{"current_steps": 10200, "total_steps": 37885, "loss": 0.0539, "lr": 1.8305983620825915e-06, "epoch": 1.3461792266068364, "percentage": 26.92, "elapsed_time": "0:14:50", "remaining_time": "0:40:16", "throughput": 5633.68, "total_tokens": 5015040} +{"current_steps": 10205, "total_steps": 37885, "loss": 0.0573, "lr": 1.8303417240874492e-06, "epoch": 1.346839118384585, "percentage": 26.94, "elapsed_time": "0:14:50", "remaining_time": "0:40:15", "throughput": 5634.2, "total_tokens": 5017344} +{"current_steps": 10210, "total_steps": 37885, "loss": 0.0528, "lr": 1.8300849098582886e-06, "epoch": 1.3474990101623334, "percentage": 26.95, "elapsed_time": "0:14:50", "remaining_time": "0:40:14", "throughput": 5634.84, "total_tokens": 5019776} +{"current_steps": 10215, "total_steps": 37885, "loss": 0.0395, "lr": 1.829827919449617e-06, "epoch": 1.348158901940082, "percentage": 26.96, "elapsed_time": "0:14:51", "remaining_time": "0:40:13", "throughput": 5635.56, "total_tokens": 5022272} +{"current_steps": 10220, "total_steps": 37885, "loss": 0.1797, "lr": 1.8295707529159783e-06, "epoch": 1.3488187937178302, "percentage": 26.98, "elapsed_time": "0:14:51", "remaining_time": "0:40:13", "throughput": 5636.27, "total_tokens": 5024768} +{"current_steps": 10225, "total_steps": 37885, "loss": 0.1089, "lr": 1.829313410311955e-06, "epoch": 1.3494786854955787, "percentage": 26.99, "elapsed_time": "0:14:51", "remaining_time": "0:40:12", "throughput": 5636.78, "total_tokens": 5027072} +{"current_steps": 10230, "total_steps": 37885, "loss": 0.1722, "lr": 1.8290558916921656e-06, "epoch": 1.3501385772733272, "percentage": 27.0, "elapsed_time": "0:14:52", "remaining_time": "0:40:11", "throughput": 5637.5, "total_tokens": 5029568} +{"current_steps": 10235, "total_steps": 37885, "loss": 0.0379, "lr": 1.8287981971112668e-06, "epoch": 1.3507984690510757, "percentage": 27.02, "elapsed_time": "0:14:52", "remaining_time": "0:40:11", "throughput": 5638.41, "total_tokens": 5032256} +{"current_steps": 10240, "total_steps": 37885, "loss": 0.0258, "lr": 1.8285403266239521e-06, "epoch": 1.3514583608288242, "percentage": 27.03, "elapsed_time": "0:14:52", "remaining_time": "0:40:10", "throughput": 5639.34, "total_tokens": 5034944} +{"current_steps": 10245, "total_steps": 37885, "loss": 0.2289, "lr": 1.8282822802849531e-06, "epoch": 1.3521182526065725, "percentage": 27.04, "elapsed_time": "0:14:53", "remaining_time": "0:40:09", "throughput": 5640.05, "total_tokens": 5037440} +{"current_steps": 10250, "total_steps": 37885, "loss": 0.0005, "lr": 1.8280240581490381e-06, "epoch": 1.352778144384321, "percentage": 27.06, "elapsed_time": "0:14:53", "remaining_time": "0:40:08", "throughput": 5640.97, "total_tokens": 5040128} +{"current_steps": 10255, "total_steps": 37885, "loss": 0.0257, "lr": 1.8277656602710127e-06, "epoch": 1.3534380361620695, "percentage": 27.07, "elapsed_time": "0:14:53", "remaining_time": "0:40:08", "throughput": 5641.69, "total_tokens": 5042624} +{"current_steps": 10260, "total_steps": 37885, "loss": 0.0817, "lr": 1.8275070867057203e-06, "epoch": 1.3540979279398178, "percentage": 27.08, "elapsed_time": "0:14:54", "remaining_time": "0:40:07", "throughput": 5642.19, "total_tokens": 5044928} +{"current_steps": 10265, "total_steps": 37885, "loss": 0.0005, "lr": 1.827248337508041e-06, "epoch": 1.3547578197175663, "percentage": 27.1, "elapsed_time": "0:14:54", "remaining_time": "0:40:06", "throughput": 5642.98, "total_tokens": 5047488} +{"current_steps": 10270, "total_steps": 37885, "loss": 0.0283, "lr": 1.8269894127328925e-06, "epoch": 1.3554177114953148, "percentage": 27.11, "elapsed_time": "0:14:54", "remaining_time": "0:40:06", "throughput": 5644.09, "total_tokens": 5050368} +{"current_steps": 10275, "total_steps": 37885, "loss": 0.0452, "lr": 1.8267303124352295e-06, "epoch": 1.3560776032730633, "percentage": 27.12, "elapsed_time": "0:14:55", "remaining_time": "0:40:05", "throughput": 5644.66, "total_tokens": 5052736} +{"current_steps": 10280, "total_steps": 37885, "loss": 0.1482, "lr": 1.826471036670045e-06, "epoch": 1.3567374950508118, "percentage": 27.13, "elapsed_time": "0:14:55", "remaining_time": "0:40:04", "throughput": 5645.32, "total_tokens": 5055168} +{"current_steps": 10285, "total_steps": 37885, "loss": 0.0281, "lr": 1.8262115854923673e-06, "epoch": 1.35739738682856, "percentage": 27.15, "elapsed_time": "0:14:55", "remaining_time": "0:40:03", "throughput": 5646.03, "total_tokens": 5057664} +{"current_steps": 10290, "total_steps": 37885, "loss": 0.1029, "lr": 1.8259519589572637e-06, "epoch": 1.3580572786063085, "percentage": 27.16, "elapsed_time": "0:14:56", "remaining_time": "0:40:03", "throughput": 5646.75, "total_tokens": 5060160} +{"current_steps": 10295, "total_steps": 37885, "loss": 0.0132, "lr": 1.8256921571198376e-06, "epoch": 1.358717170384057, "percentage": 27.17, "elapsed_time": "0:14:56", "remaining_time": "0:40:02", "throughput": 5647.72, "total_tokens": 5062912} +{"current_steps": 10300, "total_steps": 37885, "loss": 0.0083, "lr": 1.8254321800352308e-06, "epoch": 1.3593770621618055, "percentage": 27.19, "elapsed_time": "0:14:56", "remaining_time": "0:40:01", "throughput": 5648.24, "total_tokens": 5065216} +{"current_steps": 10305, "total_steps": 37885, "loss": 0.0474, "lr": 1.8251720277586209e-06, "epoch": 1.360036953939554, "percentage": 27.2, "elapsed_time": "0:14:57", "remaining_time": "0:40:00", "throughput": 5648.68, "total_tokens": 5067456} +{"current_steps": 10310, "total_steps": 37885, "loss": 0.2756, "lr": 1.8249117003452233e-06, "epoch": 1.3606968457173023, "percentage": 27.21, "elapsed_time": "0:14:57", "remaining_time": "0:40:00", "throughput": 5649.21, "total_tokens": 5069760} +{"current_steps": 10315, "total_steps": 37885, "loss": 0.1271, "lr": 1.8246511978502912e-06, "epoch": 1.3613567374950508, "percentage": 27.23, "elapsed_time": "0:14:57", "remaining_time": "0:39:59", "throughput": 5650.01, "total_tokens": 5072320} +{"current_steps": 10320, "total_steps": 37885, "loss": 0.0017, "lr": 1.8243905203291136e-06, "epoch": 1.3620166292727993, "percentage": 27.24, "elapsed_time": "0:14:58", "remaining_time": "0:39:58", "throughput": 5650.72, "total_tokens": 5074816} +{"current_steps": 10325, "total_steps": 37885, "loss": 0.0858, "lr": 1.8241296678370184e-06, "epoch": 1.3626765210505476, "percentage": 27.25, "elapsed_time": "0:14:58", "remaining_time": "0:39:58", "throughput": 5651.43, "total_tokens": 5077312} +{"current_steps": 10330, "total_steps": 37885, "loss": 0.0011, "lr": 1.8238686404293686e-06, "epoch": 1.363336412828296, "percentage": 27.27, "elapsed_time": "0:14:58", "remaining_time": "0:39:57", "throughput": 5651.94, "total_tokens": 5079616} +{"current_steps": 10335, "total_steps": 37885, "loss": 0.3048, "lr": 1.8236074381615661e-06, "epoch": 1.3639963046060446, "percentage": 27.28, "elapsed_time": "0:14:59", "remaining_time": "0:39:56", "throughput": 5652.18, "total_tokens": 5081664} +{"current_steps": 10340, "total_steps": 37885, "loss": 0.0004, "lr": 1.823346061089049e-06, "epoch": 1.364656196383793, "percentage": 27.29, "elapsed_time": "0:14:59", "remaining_time": "0:39:55", "throughput": 5652.94, "total_tokens": 5084224} +{"current_steps": 10345, "total_steps": 37885, "loss": 0.0951, "lr": 1.8230845092672925e-06, "epoch": 1.3653160881615416, "percentage": 27.31, "elapsed_time": "0:14:59", "remaining_time": "0:39:55", "throughput": 5653.45, "total_tokens": 5086528} +{"current_steps": 10350, "total_steps": 37885, "loss": 0.167, "lr": 1.8228227827518093e-06, "epoch": 1.3659759799392899, "percentage": 27.32, "elapsed_time": "0:15:00", "remaining_time": "0:39:54", "throughput": 5654.11, "total_tokens": 5088960} +{"current_steps": 10355, "total_steps": 37885, "loss": 0.0787, "lr": 1.8225608815981488e-06, "epoch": 1.3666358717170384, "percentage": 27.33, "elapsed_time": "0:15:00", "remaining_time": "0:39:53", "throughput": 5654.72, "total_tokens": 5091392} +{"current_steps": 10360, "total_steps": 37885, "loss": 0.0948, "lr": 1.8222988058618976e-06, "epoch": 1.3672957634947869, "percentage": 27.35, "elapsed_time": "0:15:00", "remaining_time": "0:39:53", "throughput": 5655.45, "total_tokens": 5093888} +{"current_steps": 10365, "total_steps": 37885, "loss": 0.15, "lr": 1.8220365555986797e-06, "epoch": 1.3679556552725354, "percentage": 27.36, "elapsed_time": "0:15:01", "remaining_time": "0:39:52", "throughput": 5656.03, "total_tokens": 5096256} +{"current_steps": 10370, "total_steps": 37885, "loss": 0.0489, "lr": 1.8217741308641553e-06, "epoch": 1.3686155470502839, "percentage": 27.37, "elapsed_time": "0:15:01", "remaining_time": "0:39:51", "throughput": 5656.81, "total_tokens": 5098816} +{"current_steps": 10375, "total_steps": 37885, "loss": 0.0487, "lr": 1.8215115317140226e-06, "epoch": 1.3692754388280322, "percentage": 27.39, "elapsed_time": "0:15:01", "remaining_time": "0:39:50", "throughput": 5657.47, "total_tokens": 5101248} +{"current_steps": 10380, "total_steps": 37885, "loss": 0.0838, "lr": 1.8212487582040164e-06, "epoch": 1.3699353306057807, "percentage": 27.4, "elapsed_time": "0:15:02", "remaining_time": "0:39:50", "throughput": 5657.91, "total_tokens": 5103488} +{"current_steps": 10385, "total_steps": 37885, "loss": 0.2107, "lr": 1.8209858103899081e-06, "epoch": 1.3705952223835292, "percentage": 27.41, "elapsed_time": "0:15:02", "remaining_time": "0:39:49", "throughput": 5658.54, "total_tokens": 5105920} +{"current_steps": 10390, "total_steps": 37885, "loss": 0.001, "lr": 1.8207226883275067e-06, "epoch": 1.3712551141612774, "percentage": 27.43, "elapsed_time": "0:15:02", "remaining_time": "0:39:48", "throughput": 5659.17, "total_tokens": 5108352} +{"current_steps": 10395, "total_steps": 37885, "loss": 0.1689, "lr": 1.820459392072658e-06, "epoch": 1.371915005939026, "percentage": 27.44, "elapsed_time": "0:15:02", "remaining_time": "0:39:48", "throughput": 5659.79, "total_tokens": 5110784} +{"current_steps": 10400, "total_steps": 37885, "loss": 0.121, "lr": 1.8201959216812443e-06, "epoch": 1.3725748977167744, "percentage": 27.45, "elapsed_time": "0:15:03", "remaining_time": "0:39:47", "throughput": 5660.57, "total_tokens": 5113344} +{"current_steps": 10405, "total_steps": 37885, "loss": 0.0541, "lr": 1.8199322772091858e-06, "epoch": 1.373234789494523, "percentage": 27.46, "elapsed_time": "0:15:03", "remaining_time": "0:39:46", "throughput": 5661.14, "total_tokens": 5115712} +{"current_steps": 10410, "total_steps": 37885, "loss": 0.0519, "lr": 1.819668458712439e-06, "epoch": 1.3738946812722714, "percentage": 27.48, "elapsed_time": "0:15:03", "remaining_time": "0:39:45", "throughput": 5661.58, "total_tokens": 5117952} +{"current_steps": 10415, "total_steps": 37885, "loss": 0.0012, "lr": 1.8194044662469973e-06, "epoch": 1.3745545730500197, "percentage": 27.49, "elapsed_time": "0:15:04", "remaining_time": "0:39:45", "throughput": 5661.96, "total_tokens": 5120128} +{"current_steps": 10420, "total_steps": 37885, "loss": 0.0045, "lr": 1.8191402998688913e-06, "epoch": 1.3752144648277682, "percentage": 27.5, "elapsed_time": "0:15:04", "remaining_time": "0:39:44", "throughput": 5662.46, "total_tokens": 5122432} +{"current_steps": 10425, "total_steps": 37885, "loss": 0.0804, "lr": 1.8188759596341888e-06, "epoch": 1.3758743566055167, "percentage": 27.52, "elapsed_time": "0:15:04", "remaining_time": "0:39:43", "throughput": 5663.29, "total_tokens": 5125056} +{"current_steps": 10430, "total_steps": 37885, "loss": 0.0818, "lr": 1.8186114455989933e-06, "epoch": 1.3765342483832652, "percentage": 27.53, "elapsed_time": "0:15:05", "remaining_time": "0:39:42", "throughput": 5663.86, "total_tokens": 5127424} +{"current_steps": 10435, "total_steps": 37885, "loss": 0.0692, "lr": 1.8183467578194467e-06, "epoch": 1.3771941401610137, "percentage": 27.54, "elapsed_time": "0:15:05", "remaining_time": "0:39:42", "throughput": 5664.42, "total_tokens": 5129792} +{"current_steps": 10440, "total_steps": 37885, "loss": 0.1073, "lr": 1.8180818963517264e-06, "epoch": 1.377854031938762, "percentage": 27.56, "elapsed_time": "0:15:05", "remaining_time": "0:39:41", "throughput": 5664.85, "total_tokens": 5132032} +{"current_steps": 10445, "total_steps": 37885, "loss": 0.0095, "lr": 1.8178168612520478e-06, "epoch": 1.3785139237165105, "percentage": 27.57, "elapsed_time": "0:15:06", "remaining_time": "0:39:40", "throughput": 5665.42, "total_tokens": 5134400} +{"current_steps": 10450, "total_steps": 37885, "loss": 0.0715, "lr": 1.8175516525766627e-06, "epoch": 1.379173815494259, "percentage": 27.58, "elapsed_time": "0:15:06", "remaining_time": "0:39:40", "throughput": 5665.85, "total_tokens": 5136640} +{"current_steps": 10455, "total_steps": 37885, "loss": 0.1421, "lr": 1.8172862703818593e-06, "epoch": 1.3798337072720073, "percentage": 27.6, "elapsed_time": "0:15:06", "remaining_time": "0:39:39", "throughput": 5666.53, "total_tokens": 5139136} +{"current_steps": 10460, "total_steps": 37885, "loss": 0.0011, "lr": 1.8170207147239636e-06, "epoch": 1.3804935990497558, "percentage": 27.61, "elapsed_time": "0:15:07", "remaining_time": "0:39:38", "throughput": 5667.23, "total_tokens": 5141632} +{"current_steps": 10465, "total_steps": 37885, "loss": 0.0696, "lr": 1.8167549856593374e-06, "epoch": 1.3811534908275043, "percentage": 27.62, "elapsed_time": "0:15:07", "remaining_time": "0:39:38", "throughput": 5668.1, "total_tokens": 5144320} +{"current_steps": 10470, "total_steps": 37885, "loss": 0.2092, "lr": 1.81648908324438e-06, "epoch": 1.3818133826052528, "percentage": 27.64, "elapsed_time": "0:15:07", "remaining_time": "0:39:37", "throughput": 5668.85, "total_tokens": 5146880} +{"current_steps": 10475, "total_steps": 37885, "loss": 0.0023, "lr": 1.8162230075355277e-06, "epoch": 1.3824732743830013, "percentage": 27.65, "elapsed_time": "0:15:08", "remaining_time": "0:39:36", "throughput": 5669.8, "total_tokens": 5149632} +{"current_steps": 10480, "total_steps": 37885, "loss": 0.0611, "lr": 1.8159567585892521e-06, "epoch": 1.3831331661607495, "percentage": 27.66, "elapsed_time": "0:15:08", "remaining_time": "0:39:35", "throughput": 5670.3, "total_tokens": 5151936} +{"current_steps": 10485, "total_steps": 37885, "loss": 0.2547, "lr": 1.8156903364620632e-06, "epoch": 1.383793057938498, "percentage": 27.68, "elapsed_time": "0:15:08", "remaining_time": "0:39:35", "throughput": 5670.91, "total_tokens": 5154368} +{"current_steps": 10490, "total_steps": 37885, "loss": 0.0018, "lr": 1.8154237412105074e-06, "epoch": 1.3844529497162466, "percentage": 27.69, "elapsed_time": "0:15:09", "remaining_time": "0:39:34", "throughput": 5671.45, "total_tokens": 5156736} +{"current_steps": 10495, "total_steps": 37885, "loss": 0.203, "lr": 1.8151569728911672e-06, "epoch": 1.385112841493995, "percentage": 27.7, "elapsed_time": "0:15:09", "remaining_time": "0:39:33", "throughput": 5672.01, "total_tokens": 5159104} +{"current_steps": 10500, "total_steps": 37885, "loss": 0.1597, "lr": 1.8148900315606625e-06, "epoch": 1.3857727332717436, "percentage": 27.72, "elapsed_time": "0:15:09", "remaining_time": "0:39:33", "throughput": 5672.55, "total_tokens": 5161472} +{"current_steps": 10505, "total_steps": 37885, "loss": 0.0015, "lr": 1.8146229172756495e-06, "epoch": 1.3864326250494918, "percentage": 27.73, "elapsed_time": "0:15:10", "remaining_time": "0:39:32", "throughput": 5673.17, "total_tokens": 5163904} +{"current_steps": 10510, "total_steps": 37885, "loss": 0.0844, "lr": 1.8143556300928214e-06, "epoch": 1.3870925168272403, "percentage": 27.74, "elapsed_time": "0:15:10", "remaining_time": "0:39:31", "throughput": 5673.91, "total_tokens": 5166464} +{"current_steps": 10515, "total_steps": 37885, "loss": 0.0516, "lr": 1.814088170068908e-06, "epoch": 1.3877524086049888, "percentage": 27.76, "elapsed_time": "0:15:10", "remaining_time": "0:39:31", "throughput": 5674.33, "total_tokens": 5168704} +{"current_steps": 10520, "total_steps": 37885, "loss": 0.0833, "lr": 1.8138205372606756e-06, "epoch": 1.388412300382737, "percentage": 27.77, "elapsed_time": "0:15:11", "remaining_time": "0:39:30", "throughput": 5675.01, "total_tokens": 5171200} +{"current_steps": 10525, "total_steps": 37885, "loss": 0.0015, "lr": 1.8135527317249273e-06, "epoch": 1.3890721921604856, "percentage": 27.78, "elapsed_time": "0:15:11", "remaining_time": "0:39:29", "throughput": 5675.5, "total_tokens": 5173504} +{"current_steps": 10530, "total_steps": 37885, "loss": 0.0479, "lr": 1.8132847535185029e-06, "epoch": 1.389732083938234, "percentage": 27.79, "elapsed_time": "0:15:11", "remaining_time": "0:39:28", "throughput": 5676.24, "total_tokens": 5176064} +{"current_steps": 10535, "total_steps": 37885, "loss": 0.0021, "lr": 1.8130166026982795e-06, "epoch": 1.3903919757159826, "percentage": 27.81, "elapsed_time": "0:15:12", "remaining_time": "0:39:28", "throughput": 5677.18, "total_tokens": 5178816} +{"current_steps": 10540, "total_steps": 37885, "loss": 0.0802, "lr": 1.8127482793211688e-06, "epoch": 1.391051867493731, "percentage": 27.82, "elapsed_time": "0:15:12", "remaining_time": "0:39:27", "throughput": 5677.8, "total_tokens": 5181248} +{"current_steps": 10545, "total_steps": 37885, "loss": 0.0009, "lr": 1.8124797834441217e-06, "epoch": 1.3917117592714794, "percentage": 27.83, "elapsed_time": "0:15:12", "remaining_time": "0:39:26", "throughput": 5678.3, "total_tokens": 5183552} +{"current_steps": 10550, "total_steps": 37885, "loss": 0.0769, "lr": 1.812211115124124e-06, "epoch": 1.3923716510492279, "percentage": 27.85, "elapsed_time": "0:15:13", "remaining_time": "0:39:26", "throughput": 5678.65, "total_tokens": 5185728} +{"current_steps": 10555, "total_steps": 37885, "loss": 0.0521, "lr": 1.8119422744181984e-06, "epoch": 1.3930315428269764, "percentage": 27.86, "elapsed_time": "0:15:13", "remaining_time": "0:39:25", "throughput": 5679.32, "total_tokens": 5188224} +{"current_steps": 10560, "total_steps": 37885, "loss": 0.1086, "lr": 1.8116732613834053e-06, "epoch": 1.3936914346047249, "percentage": 27.87, "elapsed_time": "0:15:13", "remaining_time": "0:39:24", "throughput": 5680.38, "total_tokens": 5191104} +{"current_steps": 10565, "total_steps": 37885, "loss": 0.1069, "lr": 1.81140407607684e-06, "epoch": 1.3943513263824734, "percentage": 27.89, "elapsed_time": "0:15:14", "remaining_time": "0:39:24", "throughput": 5681.08, "total_tokens": 5193600} +{"current_steps": 10570, "total_steps": 37885, "loss": 0.0569, "lr": 1.8111347185556348e-06, "epoch": 1.3950112181602217, "percentage": 27.9, "elapsed_time": "0:15:14", "remaining_time": "0:39:23", "throughput": 5681.69, "total_tokens": 5196032} +{"current_steps": 10575, "total_steps": 37885, "loss": 0.0003, "lr": 1.8108651888769595e-06, "epoch": 1.3956711099379702, "percentage": 27.91, "elapsed_time": "0:15:14", "remaining_time": "0:39:22", "throughput": 5682.51, "total_tokens": 5198656} +{"current_steps": 10580, "total_steps": 37885, "loss": 0.146, "lr": 1.8105954870980198e-06, "epoch": 1.3963310017157187, "percentage": 27.93, "elapsed_time": "0:15:15", "remaining_time": "0:39:21", "throughput": 5682.98, "total_tokens": 5200960} +{"current_steps": 10585, "total_steps": 37885, "loss": 0.0001, "lr": 1.810325613276058e-06, "epoch": 1.396990893493467, "percentage": 27.94, "elapsed_time": "0:15:15", "remaining_time": "0:39:21", "throughput": 5683.73, "total_tokens": 5203520} +{"current_steps": 10590, "total_steps": 37885, "loss": 0.0006, "lr": 1.8100555674683524e-06, "epoch": 1.3976507852712154, "percentage": 27.95, "elapsed_time": "0:15:15", "remaining_time": "0:39:20", "throughput": 5684.54, "total_tokens": 5206144} +{"current_steps": 10595, "total_steps": 37885, "loss": 0.0002, "lr": 1.8097853497322188e-06, "epoch": 1.398310677048964, "percentage": 27.97, "elapsed_time": "0:15:16", "remaining_time": "0:39:19", "throughput": 5685.33, "total_tokens": 5208768} +{"current_steps": 10600, "total_steps": 37885, "loss": 0.0942, "lr": 1.8095149601250088e-06, "epoch": 1.3989705688267124, "percentage": 27.98, "elapsed_time": "0:15:16", "remaining_time": "0:39:19", "throughput": 5685.87, "total_tokens": 5211136} +{"current_steps": 10605, "total_steps": 37885, "loss": 0.066, "lr": 1.8092443987041104e-06, "epoch": 1.399630460604461, "percentage": 27.99, "elapsed_time": "0:15:16", "remaining_time": "0:39:18", "throughput": 5686.43, "total_tokens": 5213504} +{"current_steps": 10610, "total_steps": 37885, "loss": 0.0834, "lr": 1.8089736655269486e-06, "epoch": 1.4002903523822092, "percentage": 28.01, "elapsed_time": "0:15:17", "remaining_time": "0:39:17", "throughput": 5687.11, "total_tokens": 5216000} +{"current_steps": 10615, "total_steps": 37885, "loss": 0.14, "lr": 1.8087027606509842e-06, "epoch": 1.4009502441599577, "percentage": 28.02, "elapsed_time": "0:15:17", "remaining_time": "0:39:17", "throughput": 5687.97, "total_tokens": 5218688} +{"current_steps": 10620, "total_steps": 37885, "loss": 0.0022, "lr": 1.808431684133715e-06, "epoch": 1.4016101359377062, "percentage": 28.03, "elapsed_time": "0:15:17", "remaining_time": "0:39:16", "throughput": 5688.9, "total_tokens": 5221440} +{"current_steps": 10625, "total_steps": 37885, "loss": 0.1496, "lr": 1.8081604360326753e-06, "epoch": 1.4022700277154547, "percentage": 28.05, "elapsed_time": "0:15:18", "remaining_time": "0:39:15", "throughput": 5689.23, "total_tokens": 5223616} +{"current_steps": 10630, "total_steps": 37885, "loss": 0.0769, "lr": 1.807889016405435e-06, "epoch": 1.4029299194932032, "percentage": 28.06, "elapsed_time": "0:15:18", "remaining_time": "0:39:14", "throughput": 5689.97, "total_tokens": 5226176} +{"current_steps": 10635, "total_steps": 37885, "loss": 0.0013, "lr": 1.8076174253096014e-06, "epoch": 1.4035898112709515, "percentage": 28.07, "elapsed_time": "0:15:18", "remaining_time": "0:39:14", "throughput": 5690.45, "total_tokens": 5228480} +{"current_steps": 10640, "total_steps": 37885, "loss": 0.1115, "lr": 1.8073456628028177e-06, "epoch": 1.4042497030487, "percentage": 28.08, "elapsed_time": "0:15:19", "remaining_time": "0:39:13", "throughput": 5691.08, "total_tokens": 5230912} +{"current_steps": 10645, "total_steps": 37885, "loss": 0.0883, "lr": 1.8070737289427631e-06, "epoch": 1.4049095948264485, "percentage": 28.1, "elapsed_time": "0:15:19", "remaining_time": "0:39:12", "throughput": 5691.87, "total_tokens": 5233536} +{"current_steps": 10650, "total_steps": 37885, "loss": 0.0003, "lr": 1.8068016237871541e-06, "epoch": 1.4055694866041968, "percentage": 28.11, "elapsed_time": "0:15:19", "remaining_time": "0:39:12", "throughput": 5692.61, "total_tokens": 5236096} +{"current_steps": 10655, "total_steps": 37885, "loss": 0.1611, "lr": 1.8065293473937429e-06, "epoch": 1.4062293783819453, "percentage": 28.12, "elapsed_time": "0:15:20", "remaining_time": "0:39:11", "throughput": 5693.16, "total_tokens": 5238464} +{"current_steps": 10660, "total_steps": 37885, "loss": 0.3064, "lr": 1.806256899820318e-06, "epoch": 1.4068892701596938, "percentage": 28.14, "elapsed_time": "0:15:20", "remaining_time": "0:39:10", "throughput": 5693.96, "total_tokens": 5241088} +{"current_steps": 10665, "total_steps": 37885, "loss": 0.2462, "lr": 1.8059842811247048e-06, "epoch": 1.4075491619374423, "percentage": 28.15, "elapsed_time": "0:15:20", "remaining_time": "0:39:10", "throughput": 5694.63, "total_tokens": 5243584} +{"current_steps": 10670, "total_steps": 37885, "loss": 0.0303, "lr": 1.805711491364764e-06, "epoch": 1.4082090537151908, "percentage": 28.16, "elapsed_time": "0:15:21", "remaining_time": "0:39:09", "throughput": 5695.23, "total_tokens": 5246016} +{"current_steps": 10675, "total_steps": 37885, "loss": 0.0026, "lr": 1.8054385305983942e-06, "epoch": 1.408868945492939, "percentage": 28.18, "elapsed_time": "0:15:21", "remaining_time": "0:39:08", "throughput": 5695.58, "total_tokens": 5248192} +{"current_steps": 10680, "total_steps": 37885, "loss": 0.1616, "lr": 1.8051653988835284e-06, "epoch": 1.4095288372706876, "percentage": 28.19, "elapsed_time": "0:15:21", "remaining_time": "0:39:08", "throughput": 5696.31, "total_tokens": 5250752} +{"current_steps": 10685, "total_steps": 37885, "loss": 0.1854, "lr": 1.8048920962781372e-06, "epoch": 1.410188729048436, "percentage": 28.2, "elapsed_time": "0:15:22", "remaining_time": "0:39:07", "throughput": 5696.87, "total_tokens": 5253120} +{"current_steps": 10690, "total_steps": 37885, "loss": 0.0559, "lr": 1.8046186228402273e-06, "epoch": 1.4108486208261846, "percentage": 28.22, "elapsed_time": "0:15:22", "remaining_time": "0:39:06", "throughput": 5697.72, "total_tokens": 5255808} +{"current_steps": 10695, "total_steps": 37885, "loss": 0.0009, "lr": 1.8043449786278413e-06, "epoch": 1.411508512603933, "percentage": 28.23, "elapsed_time": "0:15:22", "remaining_time": "0:39:05", "throughput": 5698.19, "total_tokens": 5258112} +{"current_steps": 10700, "total_steps": 37885, "loss": 0.0902, "lr": 1.8040711636990581e-06, "epoch": 1.4121684043816813, "percentage": 28.24, "elapsed_time": "0:15:23", "remaining_time": "0:39:05", "throughput": 5699.05, "total_tokens": 5260800} +{"current_steps": 10705, "total_steps": 37885, "loss": 0.0008, "lr": 1.8037971781119931e-06, "epoch": 1.4128282961594298, "percentage": 28.26, "elapsed_time": "0:15:23", "remaining_time": "0:39:04", "throughput": 5699.52, "total_tokens": 5263104} +{"current_steps": 10710, "total_steps": 37885, "loss": 0.204, "lr": 1.8035230219247977e-06, "epoch": 1.4134881879371783, "percentage": 28.27, "elapsed_time": "0:15:23", "remaining_time": "0:39:03", "throughput": 5700.05, "total_tokens": 5265472} +{"current_steps": 10715, "total_steps": 37885, "loss": 0.0555, "lr": 1.8032486951956596e-06, "epoch": 1.4141480797149266, "percentage": 28.28, "elapsed_time": "0:15:24", "remaining_time": "0:39:03", "throughput": 5700.91, "total_tokens": 5268160} +{"current_steps": 10720, "total_steps": 37885, "loss": 0.115, "lr": 1.8029741979828026e-06, "epoch": 1.4148079714926751, "percentage": 28.3, "elapsed_time": "0:15:24", "remaining_time": "0:39:02", "throughput": 5701.33, "total_tokens": 5270400} +{"current_steps": 10725, "total_steps": 37885, "loss": 0.0623, "lr": 1.8026995303444867e-06, "epoch": 1.4154678632704236, "percentage": 28.31, "elapsed_time": "0:15:24", "remaining_time": "0:39:01", "throughput": 5701.88, "total_tokens": 5272768} +{"current_steps": 10730, "total_steps": 37885, "loss": 0.0005, "lr": 1.802424692339008e-06, "epoch": 1.4161277550481721, "percentage": 28.32, "elapsed_time": "0:15:25", "remaining_time": "0:39:01", "throughput": 5702.88, "total_tokens": 5275584} +{"current_steps": 10735, "total_steps": 37885, "loss": 0.0371, "lr": 1.8021496840246994e-06, "epoch": 1.4167876468259206, "percentage": 28.34, "elapsed_time": "0:15:25", "remaining_time": "0:39:00", "throughput": 5703.29, "total_tokens": 5277824} +{"current_steps": 10740, "total_steps": 37885, "loss": 0.0004, "lr": 1.8018745054599292e-06, "epoch": 1.417447538603669, "percentage": 28.35, "elapsed_time": "0:15:25", "remaining_time": "0:38:59", "throughput": 5704.17, "total_tokens": 5280512} +{"current_steps": 10745, "total_steps": 37885, "loss": 0.0006, "lr": 1.8015991567031015e-06, "epoch": 1.4181074303814174, "percentage": 28.36, "elapsed_time": "0:15:26", "remaining_time": "0:38:59", "throughput": 5704.93, "total_tokens": 5283136} +{"current_steps": 10750, "total_steps": 37885, "loss": 0.0802, "lr": 1.8013236378126577e-06, "epoch": 1.418767322159166, "percentage": 28.38, "elapsed_time": "0:15:26", "remaining_time": "0:38:58", "throughput": 5705.52, "total_tokens": 5285568} +{"current_steps": 10755, "total_steps": 37885, "loss": 0.0573, "lr": 1.8010479488470743e-06, "epoch": 1.4194272139369144, "percentage": 28.39, "elapsed_time": "0:15:26", "remaining_time": "0:38:57", "throughput": 5706.07, "total_tokens": 5287936} +{"current_steps": 10760, "total_steps": 37885, "loss": 0.0006, "lr": 1.8007720898648645e-06, "epoch": 1.420087105714663, "percentage": 28.4, "elapsed_time": "0:15:27", "remaining_time": "0:38:57", "throughput": 5706.27, "total_tokens": 5289984} +{"current_steps": 10765, "total_steps": 37885, "loss": 0.1727, "lr": 1.8004960609245778e-06, "epoch": 1.4207469974924112, "percentage": 28.41, "elapsed_time": "0:15:27", "remaining_time": "0:38:56", "throughput": 5706.8, "total_tokens": 5292352} +{"current_steps": 10770, "total_steps": 37885, "loss": 0.0008, "lr": 1.8002198620847988e-06, "epoch": 1.4214068892701597, "percentage": 28.43, "elapsed_time": "0:15:27", "remaining_time": "0:38:55", "throughput": 5707.35, "total_tokens": 5294720} +{"current_steps": 10775, "total_steps": 37885, "loss": 0.0007, "lr": 1.7999434934041485e-06, "epoch": 1.4220667810479082, "percentage": 28.44, "elapsed_time": "0:15:28", "remaining_time": "0:38:54", "throughput": 5707.81, "total_tokens": 5297024} +{"current_steps": 10780, "total_steps": 37885, "loss": 0.0005, "lr": 1.7996669549412847e-06, "epoch": 1.4227266728256565, "percentage": 28.45, "elapsed_time": "0:15:28", "remaining_time": "0:38:54", "throughput": 5708.53, "total_tokens": 5299584} +{"current_steps": 10785, "total_steps": 37885, "loss": 0.1082, "lr": 1.7993902467549002e-06, "epoch": 1.4233865646034052, "percentage": 28.47, "elapsed_time": "0:15:28", "remaining_time": "0:38:53", "throughput": 5709.01, "total_tokens": 5301888} +{"current_steps": 10790, "total_steps": 37885, "loss": 0.1271, "lr": 1.7991133689037247e-06, "epoch": 1.4240464563811535, "percentage": 28.48, "elapsed_time": "0:15:29", "remaining_time": "0:38:52", "throughput": 5709.56, "total_tokens": 5304256} +{"current_steps": 10795, "total_steps": 37885, "loss": 0.1573, "lr": 1.7988363214465233e-06, "epoch": 1.424706348158902, "percentage": 28.49, "elapsed_time": "0:15:29", "remaining_time": "0:38:52", "throughput": 5710.16, "total_tokens": 5306688} +{"current_steps": 10800, "total_steps": 37885, "loss": 0.0624, "lr": 1.7985591044420975e-06, "epoch": 1.4253662399366505, "percentage": 28.51, "elapsed_time": "0:15:29", "remaining_time": "0:38:51", "throughput": 5710.89, "total_tokens": 5309248} +{"current_steps": 10805, "total_steps": 37885, "loss": 0.001, "lr": 1.7982817179492847e-06, "epoch": 1.4260261317143987, "percentage": 28.52, "elapsed_time": "0:15:29", "remaining_time": "0:38:50", "throughput": 5711.36, "total_tokens": 5311552} +{"current_steps": 10810, "total_steps": 37885, "loss": 0.0589, "lr": 1.7980041620269577e-06, "epoch": 1.4266860234921472, "percentage": 28.53, "elapsed_time": "0:15:30", "remaining_time": "0:38:50", "throughput": 5712.01, "total_tokens": 5314048} +{"current_steps": 10815, "total_steps": 37885, "loss": 0.0665, "lr": 1.7977264367340262e-06, "epoch": 1.4273459152698957, "percentage": 28.55, "elapsed_time": "0:15:30", "remaining_time": "0:38:49", "throughput": 5712.58, "total_tokens": 5316480} +{"current_steps": 10820, "total_steps": 37885, "loss": 0.0012, "lr": 1.7974485421294347e-06, "epoch": 1.4280058070476442, "percentage": 28.56, "elapsed_time": "0:15:30", "remaining_time": "0:38:48", "throughput": 5712.93, "total_tokens": 5318720} +{"current_steps": 10825, "total_steps": 37885, "loss": 0.0552, "lr": 1.7971704782721652e-06, "epoch": 1.4286656988253927, "percentage": 28.57, "elapsed_time": "0:15:31", "remaining_time": "0:38:48", "throughput": 5713.68, "total_tokens": 5321344} +{"current_steps": 10830, "total_steps": 37885, "loss": 0.1752, "lr": 1.7968922452212342e-06, "epoch": 1.429325590603141, "percentage": 28.59, "elapsed_time": "0:15:31", "remaining_time": "0:38:47", "throughput": 5714.05, "total_tokens": 5323584} +{"current_steps": 10835, "total_steps": 37885, "loss": 0.2167, "lr": 1.796613843035695e-06, "epoch": 1.4299854823808895, "percentage": 28.6, "elapsed_time": "0:15:32", "remaining_time": "0:38:46", "throughput": 5714.8, "total_tokens": 5326208} +{"current_steps": 10840, "total_steps": 37885, "loss": 0.0647, "lr": 1.796335271774636e-06, "epoch": 1.430645374158638, "percentage": 28.61, "elapsed_time": "0:15:32", "remaining_time": "0:38:46", "throughput": 5715.48, "total_tokens": 5328768} +{"current_steps": 10845, "total_steps": 37885, "loss": 0.1761, "lr": 1.7960565314971823e-06, "epoch": 1.4313052659363863, "percentage": 28.63, "elapsed_time": "0:15:32", "remaining_time": "0:38:45", "throughput": 5716.06, "total_tokens": 5331264} +{"current_steps": 10850, "total_steps": 37885, "loss": 0.0024, "lr": 1.7957776222624946e-06, "epoch": 1.431965157714135, "percentage": 28.64, "elapsed_time": "0:15:33", "remaining_time": "0:38:44", "throughput": 5716.53, "total_tokens": 5333632} +{"current_steps": 10855, "total_steps": 37885, "loss": 0.001, "lr": 1.7954985441297684e-06, "epoch": 1.4326250494918833, "percentage": 28.65, "elapsed_time": "0:15:33", "remaining_time": "0:38:44", "throughput": 5717.19, "total_tokens": 5336192} +{"current_steps": 10860, "total_steps": 37885, "loss": 0.0111, "lr": 1.7952192971582374e-06, "epoch": 1.4332849412696318, "percentage": 28.67, "elapsed_time": "0:15:33", "remaining_time": "0:38:43", "throughput": 5717.58, "total_tokens": 5338496} +{"current_steps": 10865, "total_steps": 37885, "loss": 0.0531, "lr": 1.794939881407169e-06, "epoch": 1.4339448330473803, "percentage": 28.68, "elapsed_time": "0:15:34", "remaining_time": "0:38:42", "throughput": 5718.16, "total_tokens": 5340992} +{"current_steps": 10870, "total_steps": 37885, "loss": 0.0015, "lr": 1.7946602969358673e-06, "epoch": 1.4346047248251286, "percentage": 28.69, "elapsed_time": "0:15:34", "remaining_time": "0:38:42", "throughput": 5718.79, "total_tokens": 5343552} +{"current_steps": 10875, "total_steps": 37885, "loss": 0.0022, "lr": 1.7943805438036718e-06, "epoch": 1.435264616602877, "percentage": 28.71, "elapsed_time": "0:15:34", "remaining_time": "0:38:41", "throughput": 5719.48, "total_tokens": 5346176} +{"current_steps": 10880, "total_steps": 37885, "loss": 0.0216, "lr": 1.7941006220699588e-06, "epoch": 1.4359245083806256, "percentage": 28.72, "elapsed_time": "0:15:35", "remaining_time": "0:38:40", "throughput": 5720.2, "total_tokens": 5348800} +{"current_steps": 10885, "total_steps": 37885, "loss": 0.0736, "lr": 1.7938205317941386e-06, "epoch": 1.436584400158374, "percentage": 28.73, "elapsed_time": "0:15:35", "remaining_time": "0:38:40", "throughput": 5720.91, "total_tokens": 5351424} +{"current_steps": 10890, "total_steps": 37885, "loss": 0.0003, "lr": 1.7935402730356594e-06, "epoch": 1.4372442919361226, "percentage": 28.74, "elapsed_time": "0:15:35", "remaining_time": "0:38:39", "throughput": 5721.63, "total_tokens": 5354048} +{"current_steps": 10895, "total_steps": 37885, "loss": 0.0753, "lr": 1.7932598458540036e-06, "epoch": 1.4379041837138709, "percentage": 28.76, "elapsed_time": "0:15:36", "remaining_time": "0:38:38", "throughput": 5722.09, "total_tokens": 5356416} +{"current_steps": 10900, "total_steps": 37885, "loss": 0.03, "lr": 1.7929792503086897e-06, "epoch": 1.4385640754916194, "percentage": 28.77, "elapsed_time": "0:15:36", "remaining_time": "0:38:38", "throughput": 5722.65, "total_tokens": 5358848} +{"current_steps": 10905, "total_steps": 37885, "loss": 0.0655, "lr": 1.792698486459272e-06, "epoch": 1.4392239672693679, "percentage": 28.78, "elapsed_time": "0:15:36", "remaining_time": "0:38:37", "throughput": 5723.27, "total_tokens": 5361344} +{"current_steps": 10910, "total_steps": 37885, "loss": 0.0519, "lr": 1.7924175543653411e-06, "epoch": 1.4398838590471164, "percentage": 28.8, "elapsed_time": "0:15:37", "remaining_time": "0:38:36", "throughput": 5723.94, "total_tokens": 5363904} +{"current_steps": 10915, "total_steps": 37885, "loss": 0.0833, "lr": 1.7921364540865224e-06, "epoch": 1.4405437508248649, "percentage": 28.81, "elapsed_time": "0:15:37", "remaining_time": "0:38:36", "throughput": 5724.3, "total_tokens": 5366144} +{"current_steps": 10920, "total_steps": 37885, "loss": 0.0007, "lr": 1.7918551856824776e-06, "epoch": 1.4412036426026131, "percentage": 28.82, "elapsed_time": "0:15:37", "remaining_time": "0:38:35", "throughput": 5724.72, "total_tokens": 5368448} +{"current_steps": 10925, "total_steps": 37885, "loss": 0.1, "lr": 1.7915737492129037e-06, "epoch": 1.4418635343803616, "percentage": 28.84, "elapsed_time": "0:15:38", "remaining_time": "0:38:34", "throughput": 5724.99, "total_tokens": 5370624} +{"current_steps": 10930, "total_steps": 37885, "loss": 0.0986, "lr": 1.7912921447375338e-06, "epoch": 1.4425234261581101, "percentage": 28.85, "elapsed_time": "0:15:38", "remaining_time": "0:38:34", "throughput": 5725.83, "total_tokens": 5373376} +{"current_steps": 10935, "total_steps": 37885, "loss": 0.1395, "lr": 1.7910103723161362e-06, "epoch": 1.4431833179358584, "percentage": 28.86, "elapsed_time": "0:15:38", "remaining_time": "0:38:33", "throughput": 5726.59, "total_tokens": 5376064} +{"current_steps": 10940, "total_steps": 37885, "loss": 0.0167, "lr": 1.7907284320085153e-06, "epoch": 1.443843209713607, "percentage": 28.88, "elapsed_time": "0:15:39", "remaining_time": "0:38:33", "throughput": 5727.24, "total_tokens": 5378624} +{"current_steps": 10945, "total_steps": 37885, "loss": 0.1895, "lr": 1.7904463238745105e-06, "epoch": 1.4445031014913554, "percentage": 28.89, "elapsed_time": "0:15:39", "remaining_time": "0:38:32", "throughput": 5728.0, "total_tokens": 5381312} +{"current_steps": 10950, "total_steps": 37885, "loss": 0.1904, "lr": 1.7901640479739974e-06, "epoch": 1.445162993269104, "percentage": 28.9, "elapsed_time": "0:15:39", "remaining_time": "0:38:31", "throughput": 5728.4, "total_tokens": 5383616} +{"current_steps": 10955, "total_steps": 37885, "loss": 0.1212, "lr": 1.789881604366887e-06, "epoch": 1.4458228850468524, "percentage": 28.92, "elapsed_time": "0:15:40", "remaining_time": "0:38:31", "throughput": 5729.24, "total_tokens": 5386368} +{"current_steps": 10960, "total_steps": 37885, "loss": 0.1056, "lr": 1.7895989931131262e-06, "epoch": 1.4464827768246007, "percentage": 28.93, "elapsed_time": "0:15:40", "remaining_time": "0:38:30", "throughput": 5729.7, "total_tokens": 5388736} +{"current_steps": 10965, "total_steps": 37885, "loss": 0.0437, "lr": 1.7893162142726967e-06, "epoch": 1.4471426686023492, "percentage": 28.94, "elapsed_time": "0:15:40", "remaining_time": "0:38:29", "throughput": 5730.27, "total_tokens": 5391232} +{"current_steps": 10970, "total_steps": 37885, "loss": 0.0009, "lr": 1.7890332679056165e-06, "epoch": 1.4478025603800977, "percentage": 28.96, "elapsed_time": "0:15:41", "remaining_time": "0:38:29", "throughput": 5730.89, "total_tokens": 5393792} +{"current_steps": 10975, "total_steps": 37885, "loss": 0.1911, "lr": 1.7887501540719389e-06, "epoch": 1.4484624521578462, "percentage": 28.97, "elapsed_time": "0:15:41", "remaining_time": "0:38:28", "throughput": 5731.6, "total_tokens": 5396416} +{"current_steps": 10980, "total_steps": 37885, "loss": 0.0005, "lr": 1.7884668728317531e-06, "epoch": 1.4491223439355947, "percentage": 28.98, "elapsed_time": "0:15:41", "remaining_time": "0:38:27", "throughput": 5732.49, "total_tokens": 5399232} +{"current_steps": 10985, "total_steps": 37885, "loss": 0.0615, "lr": 1.7881834242451829e-06, "epoch": 1.449782235713343, "percentage": 29.0, "elapsed_time": "0:15:42", "remaining_time": "0:38:27", "throughput": 5733.01, "total_tokens": 5401664} +{"current_steps": 10990, "total_steps": 37885, "loss": 0.001, "lr": 1.7878998083723883e-06, "epoch": 1.4504421274910915, "percentage": 29.01, "elapsed_time": "0:15:42", "remaining_time": "0:38:26", "throughput": 5733.67, "total_tokens": 5404224} +{"current_steps": 10995, "total_steps": 37885, "loss": 0.1332, "lr": 1.7876160252735652e-06, "epoch": 1.45110201926884, "percentage": 29.02, "elapsed_time": "0:15:42", "remaining_time": "0:38:25", "throughput": 5733.87, "total_tokens": 5406336} +{"current_steps": 11000, "total_steps": 37885, "loss": 0.0553, "lr": 1.7873320750089443e-06, "epoch": 1.4517619110465882, "percentage": 29.04, "elapsed_time": "0:15:43", "remaining_time": "0:38:25", "throughput": 5734.46, "total_tokens": 5408832} +{"current_steps": 11005, "total_steps": 37885, "loss": 0.0357, "lr": 1.7870479576387916e-06, "epoch": 1.4524218028243367, "percentage": 29.05, "elapsed_time": "0:15:43", "remaining_time": "0:38:24", "throughput": 5734.84, "total_tokens": 5411136} +{"current_steps": 11010, "total_steps": 37885, "loss": 0.1594, "lr": 1.7867636732234094e-06, "epoch": 1.4530816946020852, "percentage": 29.06, "elapsed_time": "0:15:43", "remaining_time": "0:38:24", "throughput": 5735.16, "total_tokens": 5413376} +{"current_steps": 11015, "total_steps": 37885, "loss": 0.0837, "lr": 1.7864792218231348e-06, "epoch": 1.4537415863798338, "percentage": 29.07, "elapsed_time": "0:15:44", "remaining_time": "0:38:23", "throughput": 5735.54, "total_tokens": 5415680} +{"current_steps": 11020, "total_steps": 37885, "loss": 0.066, "lr": 1.7861946034983406e-06, "epoch": 1.4544014781575823, "percentage": 29.09, "elapsed_time": "0:15:44", "remaining_time": "0:38:22", "throughput": 5736.05, "total_tokens": 5418112} +{"current_steps": 11025, "total_steps": 37885, "loss": 0.0883, "lr": 1.785909818309435e-06, "epoch": 1.4550613699353305, "percentage": 29.1, "elapsed_time": "0:15:44", "remaining_time": "0:38:22", "throughput": 5736.36, "total_tokens": 5420352} +{"current_steps": 11030, "total_steps": 37885, "loss": 0.1528, "lr": 1.7856248663168616e-06, "epoch": 1.455721261713079, "percentage": 29.11, "elapsed_time": "0:15:45", "remaining_time": "0:38:21", "throughput": 5736.81, "total_tokens": 5422720} +{"current_steps": 11035, "total_steps": 37885, "loss": 0.1301, "lr": 1.7853397475810995e-06, "epoch": 1.4563811534908275, "percentage": 29.13, "elapsed_time": "0:15:45", "remaining_time": "0:38:20", "throughput": 5737.26, "total_tokens": 5425024} +{"current_steps": 11040, "total_steps": 37885, "loss": 0.0646, "lr": 1.7850544621626626e-06, "epoch": 1.457041045268576, "percentage": 29.14, "elapsed_time": "0:15:45", "remaining_time": "0:38:20", "throughput": 5737.95, "total_tokens": 5427584} +{"current_steps": 11045, "total_steps": 37885, "loss": 0.2122, "lr": 1.7847690101221011e-06, "epoch": 1.4577009370463245, "percentage": 29.15, "elapsed_time": "0:15:46", "remaining_time": "0:38:19", "throughput": 5738.9, "total_tokens": 5430400} +{"current_steps": 11050, "total_steps": 37885, "loss": 0.0005, "lr": 1.7844833915200001e-06, "epoch": 1.4583608288240728, "percentage": 29.17, "elapsed_time": "0:15:46", "remaining_time": "0:38:18", "throughput": 5739.6, "total_tokens": 5432960} +{"current_steps": 11055, "total_steps": 37885, "loss": 0.0658, "lr": 1.7841976064169803e-06, "epoch": 1.4590207206018213, "percentage": 29.18, "elapsed_time": "0:15:46", "remaining_time": "0:38:18", "throughput": 5740.28, "total_tokens": 5435520} +{"current_steps": 11060, "total_steps": 37885, "loss": 0.1915, "lr": 1.7839116548736972e-06, "epoch": 1.4596806123795698, "percentage": 29.19, "elapsed_time": "0:15:47", "remaining_time": "0:38:17", "throughput": 5740.91, "total_tokens": 5438016} +{"current_steps": 11065, "total_steps": 37885, "loss": 0.0019, "lr": 1.7836255369508418e-06, "epoch": 1.460340504157318, "percentage": 29.21, "elapsed_time": "0:15:47", "remaining_time": "0:38:16", "throughput": 5741.44, "total_tokens": 5440384} +{"current_steps": 11070, "total_steps": 37885, "loss": 0.0514, "lr": 1.7833392527091409e-06, "epoch": 1.4610003959350666, "percentage": 29.22, "elapsed_time": "0:15:47", "remaining_time": "0:38:16", "throughput": 5742.24, "total_tokens": 5443072} +{"current_steps": 11075, "total_steps": 37885, "loss": 0.1296, "lr": 1.7830528022093559e-06, "epoch": 1.461660287712815, "percentage": 29.23, "elapsed_time": "0:15:48", "remaining_time": "0:38:15", "throughput": 5743.06, "total_tokens": 5445760} +{"current_steps": 11080, "total_steps": 37885, "loss": 0.0006, "lr": 1.7827661855122842e-06, "epoch": 1.4623201794905636, "percentage": 29.25, "elapsed_time": "0:15:48", "remaining_time": "0:38:14", "throughput": 5743.62, "total_tokens": 5448192} +{"current_steps": 11085, "total_steps": 37885, "loss": 0.0479, "lr": 1.7824794026787577e-06, "epoch": 1.462980071268312, "percentage": 29.26, "elapsed_time": "0:15:48", "remaining_time": "0:38:14", "throughput": 5744.31, "total_tokens": 5450752} +{"current_steps": 11090, "total_steps": 37885, "loss": 0.0038, "lr": 1.7821924537696447e-06, "epoch": 1.4636399630460604, "percentage": 29.27, "elapsed_time": "0:15:49", "remaining_time": "0:38:13", "throughput": 5744.76, "total_tokens": 5453056} +{"current_steps": 11095, "total_steps": 37885, "loss": 0.0636, "lr": 1.7819053388458474e-06, "epoch": 1.4642998548238089, "percentage": 29.29, "elapsed_time": "0:15:49", "remaining_time": "0:38:12", "throughput": 5745.63, "total_tokens": 5455808} +{"current_steps": 11100, "total_steps": 37885, "loss": 0.1838, "lr": 1.781618057968304e-06, "epoch": 1.4649597466015574, "percentage": 29.3, "elapsed_time": "0:15:49", "remaining_time": "0:38:12", "throughput": 5746.18, "total_tokens": 5458240} +{"current_steps": 11105, "total_steps": 37885, "loss": 0.0005, "lr": 1.7813306111979878e-06, "epoch": 1.4656196383793059, "percentage": 29.31, "elapsed_time": "0:15:50", "remaining_time": "0:38:11", "throughput": 5746.57, "total_tokens": 5460480} +{"current_steps": 11110, "total_steps": 37885, "loss": 0.1695, "lr": 1.7810429985959075e-06, "epoch": 1.4662795301570544, "percentage": 29.33, "elapsed_time": "0:15:50", "remaining_time": "0:38:10", "throughput": 5746.8, "total_tokens": 5462592} +{"current_steps": 11115, "total_steps": 37885, "loss": 0.3442, "lr": 1.7807552202231065e-06, "epoch": 1.4669394219348026, "percentage": 29.34, "elapsed_time": "0:15:50", "remaining_time": "0:38:10", "throughput": 5747.23, "total_tokens": 5464896} +{"current_steps": 11120, "total_steps": 37885, "loss": 0.1338, "lr": 1.7804672761406636e-06, "epoch": 1.4675993137125511, "percentage": 29.35, "elapsed_time": "0:15:51", "remaining_time": "0:38:09", "throughput": 5747.73, "total_tokens": 5467264} +{"current_steps": 11125, "total_steps": 37885, "loss": 0.0805, "lr": 1.7801791664096933e-06, "epoch": 1.4682592054902996, "percentage": 29.37, "elapsed_time": "0:15:51", "remaining_time": "0:38:08", "throughput": 5748.29, "total_tokens": 5469696} +{"current_steps": 11130, "total_steps": 37885, "loss": 0.0636, "lr": 1.7798908910913444e-06, "epoch": 1.468919097268048, "percentage": 29.38, "elapsed_time": "0:15:51", "remaining_time": "0:38:08", "throughput": 5749.21, "total_tokens": 5472512} +{"current_steps": 11135, "total_steps": 37885, "loss": 0.0954, "lr": 1.7796024502468015e-06, "epoch": 1.4695789890457964, "percentage": 29.39, "elapsed_time": "0:15:52", "remaining_time": "0:38:07", "throughput": 5750.01, "total_tokens": 5475200} +{"current_steps": 11140, "total_steps": 37885, "loss": 0.1199, "lr": 1.7793138439372839e-06, "epoch": 1.470238880823545, "percentage": 29.4, "elapsed_time": "0:15:52", "remaining_time": "0:38:06", "throughput": 5750.51, "total_tokens": 5477568} +{"current_steps": 11145, "total_steps": 37885, "loss": 0.0413, "lr": 1.7790250722240463e-06, "epoch": 1.4708987726012934, "percentage": 29.42, "elapsed_time": "0:15:52", "remaining_time": "0:38:06", "throughput": 5751.06, "total_tokens": 5480000} +{"current_steps": 11150, "total_steps": 37885, "loss": 0.0579, "lr": 1.7787361351683784e-06, "epoch": 1.471558664379042, "percentage": 29.43, "elapsed_time": "0:15:53", "remaining_time": "0:38:05", "throughput": 5751.67, "total_tokens": 5482496} +{"current_steps": 11155, "total_steps": 37885, "loss": 0.1602, "lr": 1.7784470328316048e-06, "epoch": 1.4722185561567902, "percentage": 29.44, "elapsed_time": "0:15:53", "remaining_time": "0:38:04", "throughput": 5752.23, "total_tokens": 5484928} +{"current_steps": 11160, "total_steps": 37885, "loss": 0.0452, "lr": 1.7781577652750858e-06, "epoch": 1.4728784479345387, "percentage": 29.46, "elapsed_time": "0:15:53", "remaining_time": "0:38:04", "throughput": 5752.74, "total_tokens": 5487296} +{"current_steps": 11165, "total_steps": 37885, "loss": 0.0005, "lr": 1.777868332560216e-06, "epoch": 1.4735383397122872, "percentage": 29.47, "elapsed_time": "0:15:54", "remaining_time": "0:38:03", "throughput": 5753.42, "total_tokens": 5489856} +{"current_steps": 11170, "total_steps": 37885, "loss": 0.0447, "lr": 1.7775787347484255e-06, "epoch": 1.4741982314900357, "percentage": 29.48, "elapsed_time": "0:15:54", "remaining_time": "0:38:02", "throughput": 5754.04, "total_tokens": 5492352} +{"current_steps": 11175, "total_steps": 37885, "loss": 0.0842, "lr": 1.7772889719011793e-06, "epoch": 1.4748581232677842, "percentage": 29.5, "elapsed_time": "0:15:54", "remaining_time": "0:38:02", "throughput": 5754.72, "total_tokens": 5494912} +{"current_steps": 11180, "total_steps": 37885, "loss": 0.0015, "lr": 1.7769990440799775e-06, "epoch": 1.4755180150455325, "percentage": 29.51, "elapsed_time": "0:15:55", "remaining_time": "0:38:01", "throughput": 5755.52, "total_tokens": 5497600} +{"current_steps": 11185, "total_steps": 37885, "loss": 0.0355, "lr": 1.7767089513463552e-06, "epoch": 1.476177906823281, "percentage": 29.52, "elapsed_time": "0:15:55", "remaining_time": "0:38:00", "throughput": 5756.38, "total_tokens": 5500352} +{"current_steps": 11190, "total_steps": 37885, "loss": 0.1102, "lr": 1.7764186937618826e-06, "epoch": 1.4768377986010295, "percentage": 29.54, "elapsed_time": "0:15:55", "remaining_time": "0:38:00", "throughput": 5756.74, "total_tokens": 5502592} +{"current_steps": 11195, "total_steps": 37885, "loss": 0.2163, "lr": 1.7761282713881645e-06, "epoch": 1.4774976903787778, "percentage": 29.55, "elapsed_time": "0:15:56", "remaining_time": "0:37:59", "throughput": 5757.56, "total_tokens": 5505280} +{"current_steps": 11200, "total_steps": 37885, "loss": 0.0617, "lr": 1.775837684286841e-06, "epoch": 1.4781575821565263, "percentage": 29.56, "elapsed_time": "0:15:56", "remaining_time": "0:37:58", "throughput": 5758.35, "total_tokens": 5507968} +{"current_steps": 11205, "total_steps": 37885, "loss": 0.2514, "lr": 1.7755469325195871e-06, "epoch": 1.4788174739342748, "percentage": 29.58, "elapsed_time": "0:15:56", "remaining_time": "0:37:58", "throughput": 5759.11, "total_tokens": 5510592} +{"current_steps": 11210, "total_steps": 37885, "loss": 0.054, "lr": 1.7752560161481131e-06, "epoch": 1.4794773657120233, "percentage": 29.59, "elapsed_time": "0:15:57", "remaining_time": "0:37:57", "throughput": 5759.52, "total_tokens": 5512896} +{"current_steps": 11215, "total_steps": 37885, "loss": 0.0291, "lr": 1.7749649352341636e-06, "epoch": 1.4801372574897718, "percentage": 29.6, "elapsed_time": "0:15:57", "remaining_time": "0:37:57", "throughput": 5760.32, "total_tokens": 5515584} +{"current_steps": 11220, "total_steps": 37885, "loss": 0.1157, "lr": 1.7746736898395182e-06, "epoch": 1.48079714926752, "percentage": 29.62, "elapsed_time": "0:15:57", "remaining_time": "0:37:56", "throughput": 5760.75, "total_tokens": 5517888} +{"current_steps": 11225, "total_steps": 37885, "loss": 0.1393, "lr": 1.7743822800259923e-06, "epoch": 1.4814570410452685, "percentage": 29.63, "elapsed_time": "0:15:58", "remaining_time": "0:37:55", "throughput": 5761.3, "total_tokens": 5520320} +{"current_steps": 11230, "total_steps": 37885, "loss": 0.1536, "lr": 1.7740907058554348e-06, "epoch": 1.482116932823017, "percentage": 29.64, "elapsed_time": "0:15:58", "remaining_time": "0:37:55", "throughput": 5761.73, "total_tokens": 5522624} +{"current_steps": 11235, "total_steps": 37885, "loss": 0.2235, "lr": 1.7737989673897307e-06, "epoch": 1.4827768246007655, "percentage": 29.66, "elapsed_time": "0:15:58", "remaining_time": "0:37:54", "throughput": 5762.1, "total_tokens": 5524864} +{"current_steps": 11240, "total_steps": 37885, "loss": 0.1008, "lr": 1.7735070646907988e-06, "epoch": 1.483436716378514, "percentage": 29.67, "elapsed_time": "0:15:59", "remaining_time": "0:37:53", "throughput": 5762.86, "total_tokens": 5527488} +{"current_steps": 11245, "total_steps": 37885, "loss": 0.0975, "lr": 1.773214997820594e-06, "epoch": 1.4840966081562623, "percentage": 29.68, "elapsed_time": "0:15:59", "remaining_time": "0:37:53", "throughput": 5763.35, "total_tokens": 5529856} +{"current_steps": 11250, "total_steps": 37885, "loss": 0.0849, "lr": 1.772922766841105e-06, "epoch": 1.4847564999340108, "percentage": 29.7, "elapsed_time": "0:15:59", "remaining_time": "0:37:52", "throughput": 5763.97, "total_tokens": 5532352} +{"current_steps": 11255, "total_steps": 37885, "loss": 0.0448, "lr": 1.772630371814356e-06, "epoch": 1.4854163917117593, "percentage": 29.71, "elapsed_time": "0:16:00", "remaining_time": "0:37:51", "throughput": 5764.71, "total_tokens": 5534976} +{"current_steps": 11260, "total_steps": 37885, "loss": 0.0766, "lr": 1.7723378128024056e-06, "epoch": 1.4860762834895076, "percentage": 29.72, "elapsed_time": "0:16:00", "remaining_time": "0:37:51", "throughput": 5765.27, "total_tokens": 5537408} +{"current_steps": 11265, "total_steps": 37885, "loss": 0.0378, "lr": 1.7720450898673468e-06, "epoch": 1.486736175267256, "percentage": 29.73, "elapsed_time": "0:16:00", "remaining_time": "0:37:50", "throughput": 5766.2, "total_tokens": 5540224} +{"current_steps": 11270, "total_steps": 37885, "loss": 0.2048, "lr": 1.7717522030713088e-06, "epoch": 1.4873960670450046, "percentage": 29.75, "elapsed_time": "0:16:01", "remaining_time": "0:37:49", "throughput": 5766.89, "total_tokens": 5542784} +{"current_steps": 11275, "total_steps": 37885, "loss": 0.0836, "lr": 1.771459152476454e-06, "epoch": 1.488055958822753, "percentage": 29.76, "elapsed_time": "0:16:01", "remaining_time": "0:37:49", "throughput": 5767.12, "total_tokens": 5544896} +{"current_steps": 11280, "total_steps": 37885, "loss": 0.0574, "lr": 1.7711659381449807e-06, "epoch": 1.4887158506005016, "percentage": 29.77, "elapsed_time": "0:16:01", "remaining_time": "0:37:48", "throughput": 5767.86, "total_tokens": 5547520} +{"current_steps": 11285, "total_steps": 37885, "loss": 0.1081, "lr": 1.7708725601391214e-06, "epoch": 1.4893757423782499, "percentage": 29.79, "elapsed_time": "0:16:02", "remaining_time": "0:37:47", "throughput": 5768.41, "total_tokens": 5549952} +{"current_steps": 11290, "total_steps": 37885, "loss": 0.0337, "lr": 1.7705790185211433e-06, "epoch": 1.4900356341559984, "percentage": 29.8, "elapsed_time": "0:16:02", "remaining_time": "0:37:47", "throughput": 5769.32, "total_tokens": 5552768} +{"current_steps": 11295, "total_steps": 37885, "loss": 0.0916, "lr": 1.770285313353349e-06, "epoch": 1.4906955259337469, "percentage": 29.81, "elapsed_time": "0:16:02", "remaining_time": "0:37:46", "throughput": 5770.06, "total_tokens": 5555392} +{"current_steps": 11300, "total_steps": 37885, "loss": 0.1468, "lr": 1.7699914446980745e-06, "epoch": 1.4913554177114954, "percentage": 29.83, "elapsed_time": "0:16:03", "remaining_time": "0:37:45", "throughput": 5770.57, "total_tokens": 5557760} +{"current_steps": 11305, "total_steps": 37885, "loss": 0.0169, "lr": 1.7696974126176917e-06, "epoch": 1.4920153094892439, "percentage": 29.84, "elapsed_time": "0:16:03", "remaining_time": "0:37:45", "throughput": 5771.14, "total_tokens": 5560192} +{"current_steps": 11310, "total_steps": 37885, "loss": 0.042, "lr": 1.769403217174607e-06, "epoch": 1.4926752012669922, "percentage": 29.85, "elapsed_time": "0:16:03", "remaining_time": "0:37:44", "throughput": 5771.57, "total_tokens": 5562496} +{"current_steps": 11315, "total_steps": 37885, "loss": 0.128, "lr": 1.7691088584312608e-06, "epoch": 1.4933350930447407, "percentage": 29.87, "elapsed_time": "0:16:04", "remaining_time": "0:37:43", "throughput": 5772.19, "total_tokens": 5564992} +{"current_steps": 11320, "total_steps": 37885, "loss": 0.0005, "lr": 1.7688143364501292e-06, "epoch": 1.4939949848224892, "percentage": 29.88, "elapsed_time": "0:16:04", "remaining_time": "0:37:43", "throughput": 5772.93, "total_tokens": 5567616} +{"current_steps": 11325, "total_steps": 37885, "loss": 0.001, "lr": 1.7685196512937217e-06, "epoch": 1.4946548766002374, "percentage": 29.89, "elapsed_time": "0:16:04", "remaining_time": "0:37:42", "throughput": 5773.42, "total_tokens": 5569984} +{"current_steps": 11330, "total_steps": 37885, "loss": 0.1185, "lr": 1.7682248030245836e-06, "epoch": 1.495314768377986, "percentage": 29.91, "elapsed_time": "0:16:05", "remaining_time": "0:37:41", "throughput": 5773.73, "total_tokens": 5572160} +{"current_steps": 11335, "total_steps": 37885, "loss": 0.0006, "lr": 1.7679297917052939e-06, "epoch": 1.4959746601557344, "percentage": 29.92, "elapsed_time": "0:16:05", "remaining_time": "0:37:41", "throughput": 5774.1, "total_tokens": 5574400} +{"current_steps": 11340, "total_steps": 37885, "loss": 0.0005, "lr": 1.7676346173984669e-06, "epoch": 1.496634551933483, "percentage": 29.93, "elapsed_time": "0:16:05", "remaining_time": "0:37:40", "throughput": 5774.89, "total_tokens": 5577088} +{"current_steps": 11345, "total_steps": 37885, "loss": 0.0004, "lr": 1.7673392801667513e-06, "epoch": 1.4972944437112314, "percentage": 29.95, "elapsed_time": "0:16:06", "remaining_time": "0:37:39", "throughput": 5775.51, "total_tokens": 5579584} +{"current_steps": 11350, "total_steps": 37885, "loss": 0.2305, "lr": 1.7670437800728298e-06, "epoch": 1.4979543354889797, "percentage": 29.96, "elapsed_time": "0:16:06", "remaining_time": "0:37:39", "throughput": 5775.99, "total_tokens": 5581952} +{"current_steps": 11355, "total_steps": 37885, "loss": 0.0002, "lr": 1.7667481171794205e-06, "epoch": 1.4986142272667282, "percentage": 29.97, "elapsed_time": "0:16:06", "remaining_time": "0:37:38", "throughput": 5776.63, "total_tokens": 5584448} +{"current_steps": 11360, "total_steps": 37885, "loss": 0.0414, "lr": 1.7664522915492759e-06, "epoch": 1.4992741190444767, "percentage": 29.99, "elapsed_time": "0:16:07", "remaining_time": "0:37:38", "throughput": 5777.3, "total_tokens": 5587008} +{"current_steps": 11365, "total_steps": 37885, "loss": 0.0593, "lr": 1.7661563032451827e-06, "epoch": 1.4999340108222252, "percentage": 30.0, "elapsed_time": "0:16:07", "remaining_time": "0:37:37", "throughput": 5777.69, "total_tokens": 5589312} +{"current_steps": 11370, "total_steps": 37885, "loss": 0.1073, "lr": 1.7658601523299619e-06, "epoch": 1.5005939025999737, "percentage": 30.01, "elapsed_time": "0:16:07", "remaining_time": "0:37:36", "throughput": 5778.15, "total_tokens": 5591680} +{"current_steps": 11370, "total_steps": 37885, "eval_loss": 0.12565796077251434, "epoch": 1.5005939025999737, "percentage": 30.01, "elapsed_time": "0:16:15", "remaining_time": "0:37:55", "throughput": 5731.02, "total_tokens": 5591680} +{"current_steps": 11375, "total_steps": 37885, "loss": 0.0011, "lr": 1.7655638388664698e-06, "epoch": 1.501253794377722, "percentage": 30.03, "elapsed_time": "0:16:53", "remaining_time": "0:39:22", "throughput": 5518.42, "total_tokens": 5594176} +{"current_steps": 11380, "total_steps": 37885, "loss": 0.0044, "lr": 1.765267362917597e-06, "epoch": 1.5019136861554705, "percentage": 30.04, "elapsed_time": "0:16:54", "remaining_time": "0:39:21", "throughput": 5519.39, "total_tokens": 5597056} +{"current_steps": 11385, "total_steps": 37885, "loss": 0.0555, "lr": 1.7649707245462678e-06, "epoch": 1.502573577933219, "percentage": 30.05, "elapsed_time": "0:16:54", "remaining_time": "0:39:21", "throughput": 5519.94, "total_tokens": 5599488} +{"current_steps": 11390, "total_steps": 37885, "loss": 0.0716, "lr": 1.7646739238154416e-06, "epoch": 1.5032334697109673, "percentage": 30.06, "elapsed_time": "0:16:54", "remaining_time": "0:39:20", "throughput": 5520.45, "total_tokens": 5601856} +{"current_steps": 11395, "total_steps": 37885, "loss": 0.1088, "lr": 1.7643769607881126e-06, "epoch": 1.503893361488716, "percentage": 30.08, "elapsed_time": "0:16:55", "remaining_time": "0:39:19", "throughput": 5521.42, "total_tokens": 5604736} +{"current_steps": 11400, "total_steps": 37885, "loss": 0.0004, "lr": 1.7640798355273087e-06, "epoch": 1.5045532532664643, "percentage": 30.09, "elapsed_time": "0:16:55", "remaining_time": "0:39:19", "throughput": 5521.99, "total_tokens": 5607168} +{"current_steps": 11405, "total_steps": 37885, "loss": 0.0018, "lr": 1.7637825480960929e-06, "epoch": 1.5052131450442128, "percentage": 30.1, "elapsed_time": "0:16:55", "remaining_time": "0:39:18", "throughput": 5522.57, "total_tokens": 5609600} +{"current_steps": 11410, "total_steps": 37885, "loss": 0.2186, "lr": 1.7634850985575623e-06, "epoch": 1.5058730368219613, "percentage": 30.12, "elapsed_time": "0:16:56", "remaining_time": "0:39:17", "throughput": 5523.14, "total_tokens": 5612032} +{"current_steps": 11415, "total_steps": 37885, "loss": 0.0788, "lr": 1.7631874869748477e-06, "epoch": 1.5065329285997096, "percentage": 30.13, "elapsed_time": "0:16:56", "remaining_time": "0:39:16", "throughput": 5523.88, "total_tokens": 5614656} +{"current_steps": 11420, "total_steps": 37885, "loss": 0.128, "lr": 1.7628897134111163e-06, "epoch": 1.507192820377458, "percentage": 30.14, "elapsed_time": "0:16:56", "remaining_time": "0:39:16", "throughput": 5524.15, "total_tokens": 5616768} +{"current_steps": 11425, "total_steps": 37885, "loss": 0.1947, "lr": 1.762591777929567e-06, "epoch": 1.5078527121552066, "percentage": 30.16, "elapsed_time": "0:16:57", "remaining_time": "0:39:15", "throughput": 5524.57, "total_tokens": 5619008} +{"current_steps": 11430, "total_steps": 37885, "loss": 0.1306, "lr": 1.7622936805934355e-06, "epoch": 1.5085126039329548, "percentage": 30.17, "elapsed_time": "0:16:57", "remaining_time": "0:39:14", "throughput": 5525.16, "total_tokens": 5621440} +{"current_steps": 11435, "total_steps": 37885, "loss": 0.0579, "lr": 1.7619954214659901e-06, "epoch": 1.5091724957107036, "percentage": 30.18, "elapsed_time": "0:16:57", "remaining_time": "0:39:14", "throughput": 5525.76, "total_tokens": 5623872} +{"current_steps": 11440, "total_steps": 37885, "loss": 0.0584, "lr": 1.7616970006105347e-06, "epoch": 1.5098323874884518, "percentage": 30.2, "elapsed_time": "0:16:58", "remaining_time": "0:39:13", "throughput": 5526.3, "total_tokens": 5626240} +{"current_steps": 11445, "total_steps": 37885, "loss": 0.1526, "lr": 1.7613984180904065e-06, "epoch": 1.5104922792662003, "percentage": 30.21, "elapsed_time": "0:16:58", "remaining_time": "0:39:12", "throughput": 5526.78, "total_tokens": 5628544} +{"current_steps": 11450, "total_steps": 37885, "loss": 0.116, "lr": 1.7610996739689779e-06, "epoch": 1.5111521710439488, "percentage": 30.22, "elapsed_time": "0:16:58", "remaining_time": "0:39:11", "throughput": 5527.33, "total_tokens": 5630912} +{"current_steps": 11455, "total_steps": 37885, "loss": 0.1417, "lr": 1.7608007683096547e-06, "epoch": 1.5118120628216971, "percentage": 30.24, "elapsed_time": "0:16:59", "remaining_time": "0:39:11", "throughput": 5528.03, "total_tokens": 5633472} +{"current_steps": 11460, "total_steps": 37885, "loss": 0.1697, "lr": 1.7605017011758778e-06, "epoch": 1.5124719545994458, "percentage": 30.25, "elapsed_time": "0:16:59", "remaining_time": "0:39:10", "throughput": 5528.45, "total_tokens": 5635712} +{"current_steps": 11465, "total_steps": 37885, "loss": 0.0017, "lr": 1.7602024726311219e-06, "epoch": 1.5131318463771941, "percentage": 30.26, "elapsed_time": "0:16:59", "remaining_time": "0:39:09", "throughput": 5529.09, "total_tokens": 5638208} +{"current_steps": 11470, "total_steps": 37885, "loss": 0.0026, "lr": 1.7599030827388963e-06, "epoch": 1.5137917381549426, "percentage": 30.28, "elapsed_time": "0:17:00", "remaining_time": "0:39:09", "throughput": 5529.83, "total_tokens": 5640832} +{"current_steps": 11475, "total_steps": 37885, "loss": 0.0513, "lr": 1.7596035315627442e-06, "epoch": 1.5144516299326911, "percentage": 30.29, "elapsed_time": "0:17:00", "remaining_time": "0:39:08", "throughput": 5530.35, "total_tokens": 5643200} +{"current_steps": 11480, "total_steps": 37885, "loss": 0.0494, "lr": 1.7593038191662427e-06, "epoch": 1.5151115217104394, "percentage": 30.3, "elapsed_time": "0:17:00", "remaining_time": "0:39:07", "throughput": 5531.17, "total_tokens": 5645888} +{"current_steps": 11485, "total_steps": 37885, "loss": 0.0006, "lr": 1.7590039456130046e-06, "epoch": 1.515771413488188, "percentage": 30.32, "elapsed_time": "0:17:01", "remaining_time": "0:39:07", "throughput": 5531.59, "total_tokens": 5648128} +{"current_steps": 11490, "total_steps": 37885, "loss": 0.0801, "lr": 1.758703910966675e-06, "epoch": 1.5164313052659364, "percentage": 30.33, "elapsed_time": "0:17:01", "remaining_time": "0:39:06", "throughput": 5532.25, "total_tokens": 5650624} +{"current_steps": 11495, "total_steps": 37885, "loss": 0.1932, "lr": 1.7584037152909344e-06, "epoch": 1.5170911970436847, "percentage": 30.34, "elapsed_time": "0:17:01", "remaining_time": "0:39:05", "throughput": 5532.83, "total_tokens": 5653056} +{"current_steps": 11500, "total_steps": 37885, "loss": 0.0716, "lr": 1.7581033586494973e-06, "epoch": 1.5177510888214334, "percentage": 30.36, "elapsed_time": "0:17:02", "remaining_time": "0:39:04", "throughput": 5533.48, "total_tokens": 5655552} +{"current_steps": 11505, "total_steps": 37885, "loss": 0.1644, "lr": 1.757802841106112e-06, "epoch": 1.5184109805991817, "percentage": 30.37, "elapsed_time": "0:17:02", "remaining_time": "0:39:04", "throughput": 5534.19, "total_tokens": 5658112} +{"current_steps": 11510, "total_steps": 37885, "loss": 0.0823, "lr": 1.7575021627245612e-06, "epoch": 1.5190708723769302, "percentage": 30.38, "elapsed_time": "0:17:02", "remaining_time": "0:39:03", "throughput": 5534.73, "total_tokens": 5660480} +{"current_steps": 11515, "total_steps": 37885, "loss": 0.1695, "lr": 1.7572013235686618e-06, "epoch": 1.5197307641546787, "percentage": 30.39, "elapsed_time": "0:17:03", "remaining_time": "0:39:02", "throughput": 5535.27, "total_tokens": 5662848} +{"current_steps": 11520, "total_steps": 37885, "loss": 0.0332, "lr": 1.7569003237022647e-06, "epoch": 1.520390655932427, "percentage": 30.41, "elapsed_time": "0:17:03", "remaining_time": "0:39:02", "throughput": 5536.14, "total_tokens": 5665600} +{"current_steps": 11525, "total_steps": 37885, "loss": 0.0245, "lr": 1.756599163189255e-06, "epoch": 1.5210505477101757, "percentage": 30.42, "elapsed_time": "0:17:03", "remaining_time": "0:39:01", "throughput": 5536.49, "total_tokens": 5667776} +{"current_steps": 11530, "total_steps": 37885, "loss": 0.0693, "lr": 1.7562978420935516e-06, "epoch": 1.521710439487924, "percentage": 30.43, "elapsed_time": "0:17:04", "remaining_time": "0:39:00", "throughput": 5537.24, "total_tokens": 5670400} +{"current_steps": 11535, "total_steps": 37885, "loss": 0.0025, "lr": 1.755996360479108e-06, "epoch": 1.5223703312656724, "percentage": 30.45, "elapsed_time": "0:17:04", "remaining_time": "0:39:00", "throughput": 5538.12, "total_tokens": 5673152} +{"current_steps": 11540, "total_steps": 37885, "loss": 0.0894, "lr": 1.7556947184099115e-06, "epoch": 1.523030223043421, "percentage": 30.46, "elapsed_time": "0:17:04", "remaining_time": "0:38:59", "throughput": 5538.74, "total_tokens": 5675648} +{"current_steps": 11545, "total_steps": 37885, "loss": 0.0006, "lr": 1.7553929159499832e-06, "epoch": 1.5236901148211692, "percentage": 30.47, "elapsed_time": "0:17:05", "remaining_time": "0:38:58", "throughput": 5539.41, "total_tokens": 5678144} +{"current_steps": 11550, "total_steps": 37885, "loss": 0.0004, "lr": 1.755090953163379e-06, "epoch": 1.5243500065989177, "percentage": 30.49, "elapsed_time": "0:17:05", "remaining_time": "0:38:57", "throughput": 5539.87, "total_tokens": 5680448} +{"current_steps": 11555, "total_steps": 37885, "loss": 0.0007, "lr": 1.754788830114187e-06, "epoch": 1.5250098983766662, "percentage": 30.5, "elapsed_time": "0:17:05", "remaining_time": "0:38:57", "throughput": 5540.56, "total_tokens": 5683008} +{"current_steps": 11560, "total_steps": 37885, "loss": 0.1216, "lr": 1.7544865468665325e-06, "epoch": 1.5256697901544147, "percentage": 30.51, "elapsed_time": "0:17:06", "remaining_time": "0:38:56", "throughput": 5541.3, "total_tokens": 5685632} +{"current_steps": 11565, "total_steps": 37885, "loss": 0.0914, "lr": 1.7541841034845714e-06, "epoch": 1.5263296819321632, "percentage": 30.53, "elapsed_time": "0:17:06", "remaining_time": "0:38:55", "throughput": 5541.76, "total_tokens": 5687936} +{"current_steps": 11570, "total_steps": 37885, "loss": 0.0879, "lr": 1.753881500032496e-06, "epoch": 1.5269895737099115, "percentage": 30.54, "elapsed_time": "0:17:06", "remaining_time": "0:38:55", "throughput": 5542.53, "total_tokens": 5690560} +{"current_steps": 11575, "total_steps": 37885, "loss": 0.0659, "lr": 1.7535787365745314e-06, "epoch": 1.52764946548766, "percentage": 30.55, "elapsed_time": "0:17:07", "remaining_time": "0:38:54", "throughput": 5543.03, "total_tokens": 5692928} +{"current_steps": 11580, "total_steps": 37885, "loss": 0.1278, "lr": 1.7532758131749367e-06, "epoch": 1.5283093572654085, "percentage": 30.57, "elapsed_time": "0:17:07", "remaining_time": "0:38:53", "throughput": 5543.51, "total_tokens": 5695232} +{"current_steps": 11585, "total_steps": 37885, "loss": 0.0222, "lr": 1.7529727298980058e-06, "epoch": 1.5289692490431568, "percentage": 30.58, "elapsed_time": "0:17:07", "remaining_time": "0:38:53", "throughput": 5544.28, "total_tokens": 5697856} +{"current_steps": 11590, "total_steps": 37885, "loss": 0.1518, "lr": 1.7526694868080654e-06, "epoch": 1.5296291408209055, "percentage": 30.59, "elapsed_time": "0:17:08", "remaining_time": "0:38:52", "throughput": 5545.1, "total_tokens": 5700544} +{"current_steps": 11595, "total_steps": 37885, "loss": 0.0011, "lr": 1.752366083969477e-06, "epoch": 1.5302890325986538, "percentage": 30.61, "elapsed_time": "0:17:08", "remaining_time": "0:38:51", "throughput": 5545.71, "total_tokens": 5702976} +{"current_steps": 11600, "total_steps": 37885, "loss": 0.0432, "lr": 1.7520625214466352e-06, "epoch": 1.5309489243764023, "percentage": 30.62, "elapsed_time": "0:17:08", "remaining_time": "0:38:50", "throughput": 5546.47, "total_tokens": 5705600} +{"current_steps": 11605, "total_steps": 37885, "loss": 0.157, "lr": 1.7517587993039693e-06, "epoch": 1.5316088161541508, "percentage": 30.63, "elapsed_time": "0:17:09", "remaining_time": "0:38:50", "throughput": 5547.01, "total_tokens": 5707968} +{"current_steps": 11610, "total_steps": 37885, "loss": 0.1714, "lr": 1.751454917605942e-06, "epoch": 1.532268707931899, "percentage": 30.65, "elapsed_time": "0:17:09", "remaining_time": "0:38:49", "throughput": 5547.83, "total_tokens": 5710656} +{"current_steps": 11615, "total_steps": 37885, "loss": 0.1965, "lr": 1.7511508764170502e-06, "epoch": 1.5329285997096476, "percentage": 30.66, "elapsed_time": "0:17:09", "remaining_time": "0:38:48", "throughput": 5548.29, "total_tokens": 5712960} +{"current_steps": 11620, "total_steps": 37885, "loss": 0.1463, "lr": 1.7508466758018243e-06, "epoch": 1.533588491487396, "percentage": 30.67, "elapsed_time": "0:17:10", "remaining_time": "0:38:48", "throughput": 5548.94, "total_tokens": 5715456} +{"current_steps": 11625, "total_steps": 37885, "loss": 0.1403, "lr": 1.7505423158248285e-06, "epoch": 1.5342483832651446, "percentage": 30.68, "elapsed_time": "0:17:10", "remaining_time": "0:38:47", "throughput": 5549.69, "total_tokens": 5718080} +{"current_steps": 11630, "total_steps": 37885, "loss": 0.1222, "lr": 1.750237796550661e-06, "epoch": 1.534908275042893, "percentage": 30.7, "elapsed_time": "0:17:10", "remaining_time": "0:38:46", "throughput": 5550.22, "total_tokens": 5720448} +{"current_steps": 11635, "total_steps": 37885, "loss": 0.004, "lr": 1.7499331180439545e-06, "epoch": 1.5355681668206413, "percentage": 30.71, "elapsed_time": "0:17:10", "remaining_time": "0:38:46", "throughput": 5550.74, "total_tokens": 5722816} +{"current_steps": 11640, "total_steps": 37885, "loss": 0.0761, "lr": 1.749628280369374e-06, "epoch": 1.5362280585983898, "percentage": 30.72, "elapsed_time": "0:17:11", "remaining_time": "0:38:45", "throughput": 5551.26, "total_tokens": 5725184} +{"current_steps": 11645, "total_steps": 37885, "loss": 0.0645, "lr": 1.7493232835916195e-06, "epoch": 1.5368879503761383, "percentage": 30.74, "elapsed_time": "0:17:11", "remaining_time": "0:38:44", "throughput": 5552.07, "total_tokens": 5727872} +{"current_steps": 11650, "total_steps": 37885, "loss": 0.0513, "lr": 1.7490181277754238e-06, "epoch": 1.5375478421538866, "percentage": 30.75, "elapsed_time": "0:17:12", "remaining_time": "0:38:44", "throughput": 5552.85, "total_tokens": 5730560} +{"current_steps": 11655, "total_steps": 37885, "loss": 0.0013, "lr": 1.748712812985555e-06, "epoch": 1.5382077339316353, "percentage": 30.76, "elapsed_time": "0:17:12", "remaining_time": "0:38:43", "throughput": 5553.49, "total_tokens": 5733056} +{"current_steps": 11660, "total_steps": 37885, "loss": 0.0553, "lr": 1.7484073392868133e-06, "epoch": 1.5388676257093836, "percentage": 30.78, "elapsed_time": "0:17:12", "remaining_time": "0:38:42", "throughput": 5554.31, "total_tokens": 5735744} +{"current_steps": 11665, "total_steps": 37885, "loss": 0.2724, "lr": 1.7481017067440332e-06, "epoch": 1.5395275174871321, "percentage": 30.79, "elapsed_time": "0:17:13", "remaining_time": "0:38:41", "throughput": 5554.8, "total_tokens": 5738112} +{"current_steps": 11670, "total_steps": 37885, "loss": 0.0778, "lr": 1.7477959154220834e-06, "epoch": 1.5401874092648806, "percentage": 30.8, "elapsed_time": "0:17:13", "remaining_time": "0:38:41", "throughput": 5555.26, "total_tokens": 5740480} +{"current_steps": 11675, "total_steps": 37885, "loss": 0.1881, "lr": 1.7474899653858651e-06, "epoch": 1.540847301042629, "percentage": 30.82, "elapsed_time": "0:17:13", "remaining_time": "0:38:40", "throughput": 5555.59, "total_tokens": 5742720} +{"current_steps": 11680, "total_steps": 37885, "loss": 0.0014, "lr": 1.7471838567003153e-06, "epoch": 1.5415071928203774, "percentage": 30.83, "elapsed_time": "0:17:14", "remaining_time": "0:38:39", "throughput": 5556.1, "total_tokens": 5745088} +{"current_steps": 11685, "total_steps": 37885, "loss": 0.0763, "lr": 1.746877589430402e-06, "epoch": 1.542167084598126, "percentage": 30.84, "elapsed_time": "0:17:14", "remaining_time": "0:38:39", "throughput": 5556.45, "total_tokens": 5747328} +{"current_steps": 11690, "total_steps": 37885, "loss": 0.1275, "lr": 1.7465711636411288e-06, "epoch": 1.5428269763758744, "percentage": 30.86, "elapsed_time": "0:17:14", "remaining_time": "0:38:38", "throughput": 5557.17, "total_tokens": 5749952} +{"current_steps": 11695, "total_steps": 37885, "loss": 0.2444, "lr": 1.746264579397533e-06, "epoch": 1.543486868153623, "percentage": 30.87, "elapsed_time": "0:17:15", "remaining_time": "0:38:37", "throughput": 5557.82, "total_tokens": 5752512} +{"current_steps": 11700, "total_steps": 37885, "loss": 0.0617, "lr": 1.7459578367646836e-06, "epoch": 1.5441467599313712, "percentage": 30.88, "elapsed_time": "0:17:15", "remaining_time": "0:38:37", "throughput": 5558.54, "total_tokens": 5755136} +{"current_steps": 11705, "total_steps": 37885, "loss": 0.0409, "lr": 1.7456509358076854e-06, "epoch": 1.5448066517091197, "percentage": 30.9, "elapsed_time": "0:17:15", "remaining_time": "0:38:36", "throughput": 5559.12, "total_tokens": 5757568} +{"current_steps": 11710, "total_steps": 37885, "loss": 0.0776, "lr": 1.7453438765916758e-06, "epoch": 1.5454665434868682, "percentage": 30.91, "elapsed_time": "0:17:16", "remaining_time": "0:38:35", "throughput": 5559.67, "total_tokens": 5760000} +{"current_steps": 11715, "total_steps": 37885, "loss": 0.0011, "lr": 1.7450366591818255e-06, "epoch": 1.5461264352646165, "percentage": 30.92, "elapsed_time": "0:17:16", "remaining_time": "0:38:35", "throughput": 5560.28, "total_tokens": 5762496} +{"current_steps": 11720, "total_steps": 37885, "loss": 0.0578, "lr": 1.7447292836433393e-06, "epoch": 1.5467863270423652, "percentage": 30.94, "elapsed_time": "0:17:16", "remaining_time": "0:38:34", "throughput": 5560.87, "total_tokens": 5764992} +{"current_steps": 11725, "total_steps": 37885, "loss": 0.2353, "lr": 1.744421750041456e-06, "epoch": 1.5474462188201135, "percentage": 30.95, "elapsed_time": "0:17:17", "remaining_time": "0:38:33", "throughput": 5561.53, "total_tokens": 5767552} +{"current_steps": 11730, "total_steps": 37885, "loss": 0.0704, "lr": 1.7441140584414466e-06, "epoch": 1.548106110597862, "percentage": 30.96, "elapsed_time": "0:17:17", "remaining_time": "0:38:33", "throughput": 5562.56, "total_tokens": 5770496} +{"current_steps": 11735, "total_steps": 37885, "loss": 0.0033, "lr": 1.7438062089086167e-06, "epoch": 1.5487660023756105, "percentage": 30.98, "elapsed_time": "0:17:17", "remaining_time": "0:38:32", "throughput": 5563.04, "total_tokens": 5772864} +{"current_steps": 11740, "total_steps": 37885, "loss": 0.13, "lr": 1.7434982015083056e-06, "epoch": 1.5494258941533587, "percentage": 30.99, "elapsed_time": "0:17:18", "remaining_time": "0:38:31", "throughput": 5563.66, "total_tokens": 5775360} +{"current_steps": 11745, "total_steps": 37885, "loss": 0.0007, "lr": 1.743190036305885e-06, "epoch": 1.5500857859311075, "percentage": 31.0, "elapsed_time": "0:17:18", "remaining_time": "0:38:31", "throughput": 5564.15, "total_tokens": 5777728} +{"current_steps": 11750, "total_steps": 37885, "loss": 0.138, "lr": 1.7428817133667607e-06, "epoch": 1.5507456777088557, "percentage": 31.01, "elapsed_time": "0:17:18", "remaining_time": "0:38:30", "throughput": 5564.72, "total_tokens": 5780160} +{"current_steps": 11755, "total_steps": 37885, "loss": 0.0649, "lr": 1.7425732327563724e-06, "epoch": 1.5514055694866042, "percentage": 31.03, "elapsed_time": "0:17:19", "remaining_time": "0:38:29", "throughput": 5565.34, "total_tokens": 5782656} +{"current_steps": 11760, "total_steps": 37885, "loss": 0.0009, "lr": 1.742264594540193e-06, "epoch": 1.5520654612643527, "percentage": 31.04, "elapsed_time": "0:17:19", "remaining_time": "0:38:28", "throughput": 5565.67, "total_tokens": 5784832} +{"current_steps": 11765, "total_steps": 37885, "loss": 0.0474, "lr": 1.7419557987837282e-06, "epoch": 1.552725353042101, "percentage": 31.05, "elapsed_time": "0:17:19", "remaining_time": "0:38:28", "throughput": 5566.33, "total_tokens": 5787392} +{"current_steps": 11770, "total_steps": 37885, "loss": 0.0006, "lr": 1.7416468455525179e-06, "epoch": 1.5533852448198495, "percentage": 31.07, "elapsed_time": "0:17:20", "remaining_time": "0:38:27", "throughput": 5567.12, "total_tokens": 5790080} +{"current_steps": 11775, "total_steps": 37885, "loss": 0.1212, "lr": 1.7413377349121353e-06, "epoch": 1.554045136597598, "percentage": 31.08, "elapsed_time": "0:17:20", "remaining_time": "0:38:26", "throughput": 5567.72, "total_tokens": 5792576} +{"current_steps": 11780, "total_steps": 37885, "loss": 0.0603, "lr": 1.7410284669281868e-06, "epoch": 1.5547050283753463, "percentage": 31.09, "elapsed_time": "0:17:20", "remaining_time": "0:38:26", "throughput": 5568.37, "total_tokens": 5795136} +{"current_steps": 11785, "total_steps": 37885, "loss": 0.1844, "lr": 1.7407190416663124e-06, "epoch": 1.555364920153095, "percentage": 31.11, "elapsed_time": "0:17:21", "remaining_time": "0:38:25", "throughput": 5568.92, "total_tokens": 5797568} +{"current_steps": 11790, "total_steps": 37885, "loss": 0.068, "lr": 1.7404094591921852e-06, "epoch": 1.5560248119308433, "percentage": 31.12, "elapsed_time": "0:17:21", "remaining_time": "0:38:24", "throughput": 5569.31, "total_tokens": 5799808} +{"current_steps": 11795, "total_steps": 37885, "loss": 0.0521, "lr": 1.740099719571512e-06, "epoch": 1.5566847037085918, "percentage": 31.13, "elapsed_time": "0:17:21", "remaining_time": "0:38:24", "throughput": 5569.89, "total_tokens": 5802240} +{"current_steps": 11800, "total_steps": 37885, "loss": 0.0007, "lr": 1.7397898228700324e-06, "epoch": 1.5573445954863403, "percentage": 31.15, "elapsed_time": "0:17:22", "remaining_time": "0:38:23", "throughput": 5570.26, "total_tokens": 5804480} +{"current_steps": 11805, "total_steps": 37885, "loss": 0.1066, "lr": 1.7394797691535203e-06, "epoch": 1.5580044872640886, "percentage": 31.16, "elapsed_time": "0:17:22", "remaining_time": "0:38:22", "throughput": 5570.82, "total_tokens": 5806912} +{"current_steps": 11810, "total_steps": 37885, "loss": 0.0599, "lr": 1.739169558487782e-06, "epoch": 1.5586643790418373, "percentage": 31.17, "elapsed_time": "0:17:22", "remaining_time": "0:38:22", "throughput": 5571.19, "total_tokens": 5809152} +{"current_steps": 11815, "total_steps": 37885, "loss": 0.033, "lr": 1.7388591909386575e-06, "epoch": 1.5593242708195856, "percentage": 31.19, "elapsed_time": "0:17:23", "remaining_time": "0:38:21", "throughput": 5571.85, "total_tokens": 5811712} +{"current_steps": 11820, "total_steps": 37885, "loss": 0.1292, "lr": 1.7385486665720203e-06, "epoch": 1.559984162597334, "percentage": 31.2, "elapsed_time": "0:17:23", "remaining_time": "0:38:20", "throughput": 5572.39, "total_tokens": 5814144} +{"current_steps": 11825, "total_steps": 37885, "loss": 0.0006, "lr": 1.7382379854537767e-06, "epoch": 1.5606440543750826, "percentage": 31.21, "elapsed_time": "0:17:23", "remaining_time": "0:38:20", "throughput": 5572.88, "total_tokens": 5816512} +{"current_steps": 11830, "total_steps": 37885, "loss": 0.17, "lr": 1.7379271476498666e-06, "epoch": 1.5613039461528309, "percentage": 31.23, "elapsed_time": "0:17:24", "remaining_time": "0:38:19", "throughput": 5573.52, "total_tokens": 5819072} +{"current_steps": 11835, "total_steps": 37885, "loss": 0.3636, "lr": 1.737616153226263e-06, "epoch": 1.5619638379305794, "percentage": 31.24, "elapsed_time": "0:17:24", "remaining_time": "0:38:18", "throughput": 5574.16, "total_tokens": 5821632} +{"current_steps": 11840, "total_steps": 37885, "loss": 0.0012, "lr": 1.7373050022489722e-06, "epoch": 1.5626237297083279, "percentage": 31.25, "elapsed_time": "0:17:24", "remaining_time": "0:38:18", "throughput": 5574.64, "total_tokens": 5824000} +{"current_steps": 11845, "total_steps": 37885, "loss": 0.1184, "lr": 1.736993694784034e-06, "epoch": 1.5632836214860761, "percentage": 31.27, "elapsed_time": "0:17:25", "remaining_time": "0:38:17", "throughput": 5575.07, "total_tokens": 5826304} +{"current_steps": 11850, "total_steps": 37885, "loss": 0.0028, "lr": 1.736682230897521e-06, "epoch": 1.5639435132638249, "percentage": 31.28, "elapsed_time": "0:17:25", "remaining_time": "0:38:16", "throughput": 5575.48, "total_tokens": 5828608} +{"current_steps": 11855, "total_steps": 37885, "loss": 0.0607, "lr": 1.7363706106555388e-06, "epoch": 1.5646034050415731, "percentage": 31.29, "elapsed_time": "0:17:25", "remaining_time": "0:38:16", "throughput": 5576.18, "total_tokens": 5831232} +{"current_steps": 11860, "total_steps": 37885, "loss": 0.1293, "lr": 1.7360588341242273e-06, "epoch": 1.5652632968193216, "percentage": 31.31, "elapsed_time": "0:17:26", "remaining_time": "0:38:15", "throughput": 5576.73, "total_tokens": 5833664} +{"current_steps": 11865, "total_steps": 37885, "loss": 0.047, "lr": 1.7357469013697582e-06, "epoch": 1.5659231885970701, "percentage": 31.32, "elapsed_time": "0:17:26", "remaining_time": "0:38:14", "throughput": 5577.15, "total_tokens": 5835968} +{"current_steps": 11870, "total_steps": 37885, "loss": 0.05, "lr": 1.735434812458337e-06, "epoch": 1.5665830803748184, "percentage": 31.33, "elapsed_time": "0:17:26", "remaining_time": "0:38:14", "throughput": 5577.75, "total_tokens": 5838464} +{"current_steps": 11875, "total_steps": 37885, "loss": 0.0951, "lr": 1.7351225674562023e-06, "epoch": 1.5672429721525671, "percentage": 31.34, "elapsed_time": "0:17:27", "remaining_time": "0:38:13", "throughput": 5578.17, "total_tokens": 5840768} +{"current_steps": 11880, "total_steps": 37885, "loss": 0.0417, "lr": 1.7348101664296265e-06, "epoch": 1.5679028639303154, "percentage": 31.36, "elapsed_time": "0:17:27", "remaining_time": "0:38:12", "throughput": 5578.84, "total_tokens": 5843328} +{"current_steps": 11885, "total_steps": 37885, "loss": 0.0526, "lr": 1.7344976094449138e-06, "epoch": 1.568562755708064, "percentage": 31.37, "elapsed_time": "0:17:27", "remaining_time": "0:38:12", "throughput": 5579.44, "total_tokens": 5845824} +{"current_steps": 11890, "total_steps": 37885, "loss": 0.0011, "lr": 1.734184896568402e-06, "epoch": 1.5692226474858124, "percentage": 31.38, "elapsed_time": "0:17:28", "remaining_time": "0:38:11", "throughput": 5579.76, "total_tokens": 5848000} +{"current_steps": 11895, "total_steps": 37885, "loss": 0.1243, "lr": 1.7338720278664627e-06, "epoch": 1.5698825392635607, "percentage": 31.4, "elapsed_time": "0:17:28", "remaining_time": "0:38:10", "throughput": 5580.3, "total_tokens": 5850432} +{"current_steps": 11900, "total_steps": 37885, "loss": 0.0003, "lr": 1.7335590034054997e-06, "epoch": 1.5705424310413092, "percentage": 31.41, "elapsed_time": "0:17:28", "remaining_time": "0:38:10", "throughput": 5580.78, "total_tokens": 5852800} +{"current_steps": 11905, "total_steps": 37885, "loss": 0.0568, "lr": 1.7332458232519502e-06, "epoch": 1.5712023228190577, "percentage": 31.42, "elapsed_time": "0:17:29", "remaining_time": "0:38:09", "throughput": 5581.21, "total_tokens": 5855104} +{"current_steps": 11910, "total_steps": 37885, "loss": 0.0006, "lr": 1.7329324874722846e-06, "epoch": 1.571862214596806, "percentage": 31.44, "elapsed_time": "0:17:29", "remaining_time": "0:38:08", "throughput": 5581.86, "total_tokens": 5857664} +{"current_steps": 11915, "total_steps": 37885, "loss": 0.0007, "lr": 1.7326189961330058e-06, "epoch": 1.5725221063745547, "percentage": 31.45, "elapsed_time": "0:17:29", "remaining_time": "0:38:08", "throughput": 5582.22, "total_tokens": 5859904} +{"current_steps": 11920, "total_steps": 37885, "loss": 0.0809, "lr": 1.7323053493006505e-06, "epoch": 1.573181998152303, "percentage": 31.46, "elapsed_time": "0:17:30", "remaining_time": "0:38:07", "throughput": 5582.53, "total_tokens": 5862080} +{"current_steps": 11925, "total_steps": 37885, "loss": 0.0004, "lr": 1.7319915470417876e-06, "epoch": 1.5738418899300515, "percentage": 31.48, "elapsed_time": "0:17:30", "remaining_time": "0:38:06", "throughput": 5582.96, "total_tokens": 5864384} +{"current_steps": 11930, "total_steps": 37885, "loss": 0.191, "lr": 1.7316775894230197e-06, "epoch": 1.5745017817078, "percentage": 31.49, "elapsed_time": "0:17:30", "remaining_time": "0:38:05", "throughput": 5583.44, "total_tokens": 5866752} +{"current_steps": 11935, "total_steps": 37885, "loss": 0.0737, "lr": 1.7313634765109816e-06, "epoch": 1.5751616734855483, "percentage": 31.5, "elapsed_time": "0:17:31", "remaining_time": "0:38:05", "throughput": 5584.06, "total_tokens": 5869248} +{"current_steps": 11940, "total_steps": 37885, "loss": 0.0554, "lr": 1.731049208372342e-06, "epoch": 1.575821565263297, "percentage": 31.52, "elapsed_time": "0:17:31", "remaining_time": "0:38:04", "throughput": 5584.76, "total_tokens": 5871872} +{"current_steps": 11945, "total_steps": 37885, "loss": 0.1984, "lr": 1.7307347850738014e-06, "epoch": 1.5764814570410453, "percentage": 31.53, "elapsed_time": "0:17:31", "remaining_time": "0:38:03", "throughput": 5585.17, "total_tokens": 5874176} +{"current_steps": 11950, "total_steps": 37885, "loss": 0.0005, "lr": 1.7304202066820945e-06, "epoch": 1.5771413488187938, "percentage": 31.54, "elapsed_time": "0:17:32", "remaining_time": "0:38:03", "throughput": 5585.58, "total_tokens": 5876480} +{"current_steps": 11955, "total_steps": 37885, "loss": 0.0588, "lr": 1.7301054732639882e-06, "epoch": 1.5778012405965423, "percentage": 31.56, "elapsed_time": "0:17:32", "remaining_time": "0:38:02", "throughput": 5586.26, "total_tokens": 5879104} +{"current_steps": 11960, "total_steps": 37885, "loss": 0.1775, "lr": 1.729790584886282e-06, "epoch": 1.5784611323742905, "percentage": 31.57, "elapsed_time": "0:17:32", "remaining_time": "0:38:02", "throughput": 5587.09, "total_tokens": 5881856} +{"current_steps": 11965, "total_steps": 37885, "loss": 0.0426, "lr": 1.7294755416158089e-06, "epoch": 1.579121024152039, "percentage": 31.58, "elapsed_time": "0:17:33", "remaining_time": "0:38:01", "throughput": 5587.72, "total_tokens": 5884416} +{"current_steps": 11970, "total_steps": 37885, "loss": 0.1012, "lr": 1.7291603435194344e-06, "epoch": 1.5797809159297875, "percentage": 31.6, "elapsed_time": "0:17:33", "remaining_time": "0:38:00", "throughput": 5588.41, "total_tokens": 5887040} +{"current_steps": 11975, "total_steps": 37885, "loss": 0.001, "lr": 1.7288449906640571e-06, "epoch": 1.5804408077075358, "percentage": 31.61, "elapsed_time": "0:17:33", "remaining_time": "0:38:00", "throughput": 5588.98, "total_tokens": 5889536} +{"current_steps": 11980, "total_steps": 37885, "loss": 0.1094, "lr": 1.7285294831166087e-06, "epoch": 1.5811006994852845, "percentage": 31.62, "elapsed_time": "0:17:34", "remaining_time": "0:37:59", "throughput": 5589.28, "total_tokens": 5891712} +{"current_steps": 11985, "total_steps": 37885, "loss": 0.0588, "lr": 1.728213820944053e-06, "epoch": 1.5817605912630328, "percentage": 31.64, "elapsed_time": "0:17:34", "remaining_time": "0:37:58", "throughput": 5589.68, "total_tokens": 5894016} +{"current_steps": 11990, "total_steps": 37885, "loss": 0.0014, "lr": 1.727898004213387e-06, "epoch": 1.5824204830407813, "percentage": 31.65, "elapsed_time": "0:17:34", "remaining_time": "0:37:58", "throughput": 5590.12, "total_tokens": 5896320} +{"current_steps": 11995, "total_steps": 37885, "loss": 0.1045, "lr": 1.7275820329916408e-06, "epoch": 1.5830803748185298, "percentage": 31.66, "elapsed_time": "0:17:35", "remaining_time": "0:37:57", "throughput": 5590.77, "total_tokens": 5898880} +{"current_steps": 12000, "total_steps": 37885, "loss": 0.1356, "lr": 1.7272659073458766e-06, "epoch": 1.583740266596278, "percentage": 31.67, "elapsed_time": "0:17:35", "remaining_time": "0:37:56", "throughput": 5591.6, "total_tokens": 5901632} +{"current_steps": 12005, "total_steps": 37885, "loss": 0.0449, "lr": 1.7269496273431903e-06, "epoch": 1.5844001583740268, "percentage": 31.69, "elapsed_time": "0:17:35", "remaining_time": "0:37:56", "throughput": 5592.0, "total_tokens": 5903936} +{"current_steps": 12010, "total_steps": 37885, "loss": 0.0552, "lr": 1.7266331930507097e-06, "epoch": 1.585060050151775, "percentage": 31.7, "elapsed_time": "0:17:36", "remaining_time": "0:37:55", "throughput": 5592.38, "total_tokens": 5906176} +{"current_steps": 12015, "total_steps": 37885, "loss": 0.0456, "lr": 1.7263166045355954e-06, "epoch": 1.5857199419295236, "percentage": 31.71, "elapsed_time": "0:17:36", "remaining_time": "0:37:54", "throughput": 5593.17, "total_tokens": 5908864} +{"current_steps": 12020, "total_steps": 37885, "loss": 0.0356, "lr": 1.7259998618650418e-06, "epoch": 1.586379833707272, "percentage": 31.73, "elapsed_time": "0:17:36", "remaining_time": "0:37:53", "throughput": 5593.84, "total_tokens": 5911424} +{"current_steps": 12025, "total_steps": 37885, "loss": 0.0003, "lr": 1.7256829651062745e-06, "epoch": 1.5870397254850204, "percentage": 31.74, "elapsed_time": "0:17:37", "remaining_time": "0:37:53", "throughput": 5594.47, "total_tokens": 5913920} +{"current_steps": 12030, "total_steps": 37885, "loss": 0.2519, "lr": 1.725365914326553e-06, "epoch": 1.5876996172627689, "percentage": 31.75, "elapsed_time": "0:17:37", "remaining_time": "0:37:52", "throughput": 5594.86, "total_tokens": 5916160} +{"current_steps": 12035, "total_steps": 37885, "loss": 0.0035, "lr": 1.7250487095931687e-06, "epoch": 1.5883595090405174, "percentage": 31.77, "elapsed_time": "0:17:37", "remaining_time": "0:37:51", "throughput": 5595.49, "total_tokens": 5918656} +{"current_steps": 12040, "total_steps": 37885, "loss": 0.0008, "lr": 1.7247313509734465e-06, "epoch": 1.5890194008182656, "percentage": 31.78, "elapsed_time": "0:17:38", "remaining_time": "0:37:51", "throughput": 5596.05, "total_tokens": 5921088} +{"current_steps": 12045, "total_steps": 37885, "loss": 0.0678, "lr": 1.7244138385347429e-06, "epoch": 1.5896792925960144, "percentage": 31.79, "elapsed_time": "0:17:38", "remaining_time": "0:37:50", "throughput": 5596.57, "total_tokens": 5923456} +{"current_steps": 12050, "total_steps": 37885, "loss": 0.0005, "lr": 1.7240961723444479e-06, "epoch": 1.5903391843737626, "percentage": 31.81, "elapsed_time": "0:17:38", "remaining_time": "0:37:49", "throughput": 5597.13, "total_tokens": 5925888} +{"current_steps": 12055, "total_steps": 37885, "loss": 0.0975, "lr": 1.7237783524699836e-06, "epoch": 1.5909990761515111, "percentage": 31.82, "elapsed_time": "0:17:39", "remaining_time": "0:37:49", "throughput": 5597.76, "total_tokens": 5928384} +{"current_steps": 12060, "total_steps": 37885, "loss": 0.0006, "lr": 1.7234603789788054e-06, "epoch": 1.5916589679292596, "percentage": 31.83, "elapsed_time": "0:17:39", "remaining_time": "0:37:48", "throughput": 5598.37, "total_tokens": 5930880} +{"current_steps": 12065, "total_steps": 37885, "loss": 0.0004, "lr": 1.7231422519384008e-06, "epoch": 1.592318859707008, "percentage": 31.85, "elapsed_time": "0:17:39", "remaining_time": "0:37:47", "throughput": 5598.82, "total_tokens": 5933184} +{"current_steps": 12070, "total_steps": 37885, "loss": 0.2659, "lr": 1.7228239714162895e-06, "epoch": 1.5929787514847566, "percentage": 31.86, "elapsed_time": "0:17:40", "remaining_time": "0:37:47", "throughput": 5599.5, "total_tokens": 5935744} +{"current_steps": 12075, "total_steps": 37885, "loss": 0.0666, "lr": 1.7225055374800249e-06, "epoch": 1.593638643262505, "percentage": 31.87, "elapsed_time": "0:17:40", "remaining_time": "0:37:46", "throughput": 5600.17, "total_tokens": 5938304} +{"current_steps": 12080, "total_steps": 37885, "loss": 0.0895, "lr": 1.7221869501971917e-06, "epoch": 1.5942985350402534, "percentage": 31.89, "elapsed_time": "0:17:40", "remaining_time": "0:37:45", "throughput": 5600.96, "total_tokens": 5940992} +{"current_steps": 12085, "total_steps": 37885, "loss": 0.0722, "lr": 1.721868209635408e-06, "epoch": 1.594958426818002, "percentage": 31.9, "elapsed_time": "0:17:41", "remaining_time": "0:37:45", "throughput": 5601.62, "total_tokens": 5943552} +{"current_steps": 12090, "total_steps": 37885, "loss": 0.0022, "lr": 1.7215493158623242e-06, "epoch": 1.5956183185957502, "percentage": 31.91, "elapsed_time": "0:17:41", "remaining_time": "0:37:44", "throughput": 5602.35, "total_tokens": 5946176} +{"current_steps": 12095, "total_steps": 37885, "loss": 0.0013, "lr": 1.7212302689456234e-06, "epoch": 1.5962782103734987, "percentage": 31.93, "elapsed_time": "0:17:41", "remaining_time": "0:37:43", "throughput": 5603.07, "total_tokens": 5948800} +{"current_steps": 12100, "total_steps": 37885, "loss": 0.0489, "lr": 1.72091106895302e-06, "epoch": 1.5969381021512472, "percentage": 31.94, "elapsed_time": "0:17:42", "remaining_time": "0:37:43", "throughput": 5603.68, "total_tokens": 5951296} +{"current_steps": 12105, "total_steps": 37885, "loss": 0.0734, "lr": 1.7205917159522635e-06, "epoch": 1.5975979939289955, "percentage": 31.95, "elapsed_time": "0:17:42", "remaining_time": "0:37:42", "throughput": 5604.13, "total_tokens": 5953600} +{"current_steps": 12110, "total_steps": 37885, "loss": 0.0671, "lr": 1.7202722100111328e-06, "epoch": 1.5982578857067442, "percentage": 31.97, "elapsed_time": "0:17:42", "remaining_time": "0:37:41", "throughput": 5604.84, "total_tokens": 5956224} +{"current_steps": 12115, "total_steps": 37885, "loss": 0.1586, "lr": 1.7199525511974417e-06, "epoch": 1.5989177774844925, "percentage": 31.98, "elapsed_time": "0:17:43", "remaining_time": "0:37:41", "throughput": 5605.34, "total_tokens": 5958656} +{"current_steps": 12120, "total_steps": 37885, "loss": 0.0006, "lr": 1.7196327395790352e-06, "epoch": 1.599577669262241, "percentage": 31.99, "elapsed_time": "0:17:43", "remaining_time": "0:37:40", "throughput": 5605.84, "total_tokens": 5961088} +{"current_steps": 12125, "total_steps": 37885, "loss": 0.1002, "lr": 1.7193127752237906e-06, "epoch": 1.6002375610399895, "percentage": 32.0, "elapsed_time": "0:17:43", "remaining_time": "0:37:39", "throughput": 5606.33, "total_tokens": 5963520} +{"current_steps": 12130, "total_steps": 37885, "loss": 0.0006, "lr": 1.7189926581996184e-06, "epoch": 1.6008974528177378, "percentage": 32.02, "elapsed_time": "0:17:44", "remaining_time": "0:37:39", "throughput": 5606.78, "total_tokens": 5965888} +{"current_steps": 12135, "total_steps": 37885, "loss": 0.0669, "lr": 1.7186723885744609e-06, "epoch": 1.6015573445954865, "percentage": 32.03, "elapsed_time": "0:17:44", "remaining_time": "0:37:38", "throughput": 5607.06, "total_tokens": 5968064} +{"current_steps": 12140, "total_steps": 37885, "loss": 0.138, "lr": 1.7183519664162934e-06, "epoch": 1.6022172363732348, "percentage": 32.04, "elapsed_time": "0:17:44", "remaining_time": "0:37:37", "throughput": 5607.62, "total_tokens": 5970560} +{"current_steps": 12145, "total_steps": 37885, "loss": 0.1324, "lr": 1.7180313917931226e-06, "epoch": 1.6028771281509833, "percentage": 32.06, "elapsed_time": "0:17:45", "remaining_time": "0:37:37", "throughput": 5608.35, "total_tokens": 5973248} +{"current_steps": 12150, "total_steps": 37885, "loss": 0.0511, "lr": 1.717710664772989e-06, "epoch": 1.6035370199287318, "percentage": 32.07, "elapsed_time": "0:17:45", "remaining_time": "0:37:36", "throughput": 5608.95, "total_tokens": 5975808} +{"current_steps": 12155, "total_steps": 37885, "loss": 0.0011, "lr": 1.7173897854239635e-06, "epoch": 1.60419691170648, "percentage": 32.08, "elapsed_time": "0:17:45", "remaining_time": "0:37:35", "throughput": 5609.38, "total_tokens": 5978176} +{"current_steps": 12160, "total_steps": 37885, "loss": 0.0115, "lr": 1.7170687538141512e-06, "epoch": 1.6048568034842285, "percentage": 32.1, "elapsed_time": "0:17:46", "remaining_time": "0:37:35", "throughput": 5609.88, "total_tokens": 5980608} +{"current_steps": 12165, "total_steps": 37885, "loss": 0.0712, "lr": 1.7167475700116882e-06, "epoch": 1.605516695261977, "percentage": 32.11, "elapsed_time": "0:17:46", "remaining_time": "0:37:34", "throughput": 5610.72, "total_tokens": 5983424} +{"current_steps": 12170, "total_steps": 37885, "loss": 0.1335, "lr": 1.7164262340847442e-06, "epoch": 1.6061765870397253, "percentage": 32.12, "elapsed_time": "0:17:46", "remaining_time": "0:37:34", "throughput": 5611.1, "total_tokens": 5985728} +{"current_steps": 12175, "total_steps": 37885, "loss": 0.0008, "lr": 1.71610474610152e-06, "epoch": 1.606836478817474, "percentage": 32.14, "elapsed_time": "0:17:47", "remaining_time": "0:37:33", "throughput": 5611.71, "total_tokens": 5988288} +{"current_steps": 12180, "total_steps": 37885, "loss": 0.0775, "lr": 1.7157831061302485e-06, "epoch": 1.6074963705952223, "percentage": 32.15, "elapsed_time": "0:17:47", "remaining_time": "0:37:32", "throughput": 5612.63, "total_tokens": 5991232} +{"current_steps": 12185, "total_steps": 37885, "loss": 0.0946, "lr": 1.7154613142391968e-06, "epoch": 1.6081562623729708, "percentage": 32.16, "elapsed_time": "0:17:47", "remaining_time": "0:37:32", "throughput": 5613.17, "total_tokens": 5993728} +{"current_steps": 12190, "total_steps": 37885, "loss": 0.0603, "lr": 1.7151393704966616e-06, "epoch": 1.6088161541507193, "percentage": 32.18, "elapsed_time": "0:17:48", "remaining_time": "0:37:31", "throughput": 5613.77, "total_tokens": 5996288} +{"current_steps": 12195, "total_steps": 37885, "loss": 0.1577, "lr": 1.7148172749709736e-06, "epoch": 1.6094760459284676, "percentage": 32.19, "elapsed_time": "0:17:48", "remaining_time": "0:37:30", "throughput": 5614.4, "total_tokens": 5998848} +{"current_steps": 12200, "total_steps": 37885, "loss": 0.0006, "lr": 1.7144950277304955e-06, "epoch": 1.6101359377062163, "percentage": 32.2, "elapsed_time": "0:17:48", "remaining_time": "0:37:30", "throughput": 5615.28, "total_tokens": 6001728} +{"current_steps": 12205, "total_steps": 37885, "loss": 0.0347, "lr": 1.7141726288436216e-06, "epoch": 1.6107958294839646, "percentage": 32.22, "elapsed_time": "0:17:49", "remaining_time": "0:37:29", "throughput": 5616.07, "total_tokens": 6004480} +{"current_steps": 12210, "total_steps": 37885, "loss": 0.0302, "lr": 1.713850078378779e-06, "epoch": 1.611455721261713, "percentage": 32.23, "elapsed_time": "0:17:49", "remaining_time": "0:37:28", "throughput": 5616.58, "total_tokens": 6006912} +{"current_steps": 12215, "total_steps": 37885, "loss": 0.1457, "lr": 1.7135273764044262e-06, "epoch": 1.6121156130394616, "percentage": 32.24, "elapsed_time": "0:17:49", "remaining_time": "0:37:28", "throughput": 5617.1, "total_tokens": 6009344} +{"current_steps": 12220, "total_steps": 37885, "loss": 0.0659, "lr": 1.7132045229890552e-06, "epoch": 1.6127755048172099, "percentage": 32.26, "elapsed_time": "0:17:50", "remaining_time": "0:37:27", "throughput": 5617.6, "total_tokens": 6011776} +{"current_steps": 12225, "total_steps": 37885, "loss": 0.1002, "lr": 1.7128815182011886e-06, "epoch": 1.6134353965949584, "percentage": 32.27, "elapsed_time": "0:17:50", "remaining_time": "0:37:26", "throughput": 5617.97, "total_tokens": 6014080} +{"current_steps": 12230, "total_steps": 37885, "loss": 0.1825, "lr": 1.7125583621093819e-06, "epoch": 1.6140952883727069, "percentage": 32.28, "elapsed_time": "0:17:50", "remaining_time": "0:37:26", "throughput": 5618.56, "total_tokens": 6016640} +{"current_steps": 12235, "total_steps": 37885, "loss": 0.0623, "lr": 1.712235054782223e-06, "epoch": 1.6147551801504552, "percentage": 32.3, "elapsed_time": "0:17:51", "remaining_time": "0:37:25", "throughput": 5618.87, "total_tokens": 6018816} +{"current_steps": 12240, "total_steps": 37885, "loss": 0.001, "lr": 1.7119115962883313e-06, "epoch": 1.6154150719282039, "percentage": 32.31, "elapsed_time": "0:17:51", "remaining_time": "0:37:24", "throughput": 5619.48, "total_tokens": 6021312} +{"current_steps": 12245, "total_steps": 37885, "loss": 0.0006, "lr": 1.7115879866963586e-06, "epoch": 1.6160749637059522, "percentage": 32.32, "elapsed_time": "0:17:51", "remaining_time": "0:37:24", "throughput": 5619.92, "total_tokens": 6023616} +{"current_steps": 12250, "total_steps": 37885, "loss": 0.1983, "lr": 1.7112642260749885e-06, "epoch": 1.6167348554837007, "percentage": 32.33, "elapsed_time": "0:17:52", "remaining_time": "0:37:23", "throughput": 5620.52, "total_tokens": 6026112} +{"current_steps": 12255, "total_steps": 37885, "loss": 0.0004, "lr": 1.7109403144929369e-06, "epoch": 1.6173947472614492, "percentage": 32.35, "elapsed_time": "0:17:52", "remaining_time": "0:37:23", "throughput": 5621.05, "total_tokens": 6028544} +{"current_steps": 12260, "total_steps": 37885, "loss": 0.0435, "lr": 1.7106162520189522e-06, "epoch": 1.6180546390391974, "percentage": 32.36, "elapsed_time": "0:17:52", "remaining_time": "0:37:22", "throughput": 5621.69, "total_tokens": 6031104} +{"current_steps": 12265, "total_steps": 37885, "loss": 0.1353, "lr": 1.7102920387218136e-06, "epoch": 1.6187145308169462, "percentage": 32.37, "elapsed_time": "0:17:53", "remaining_time": "0:37:21", "throughput": 5622.38, "total_tokens": 6033728} +{"current_steps": 12270, "total_steps": 37885, "loss": 0.2136, "lr": 1.7099676746703332e-06, "epoch": 1.6193744225946944, "percentage": 32.39, "elapsed_time": "0:17:53", "remaining_time": "0:37:21", "throughput": 5623.09, "total_tokens": 6036352} +{"current_steps": 12275, "total_steps": 37885, "loss": 0.1958, "lr": 1.7096431599333552e-06, "epoch": 1.620034314372443, "percentage": 32.4, "elapsed_time": "0:17:53", "remaining_time": "0:37:20", "throughput": 5623.72, "total_tokens": 6038912} +{"current_steps": 12280, "total_steps": 37885, "loss": 0.0934, "lr": 1.709318494579755e-06, "epoch": 1.6206942061501914, "percentage": 32.41, "elapsed_time": "0:17:54", "remaining_time": "0:37:19", "throughput": 5624.36, "total_tokens": 6041472} +{"current_steps": 12285, "total_steps": 37885, "loss": 0.0024, "lr": 1.7089936786784414e-06, "epoch": 1.6213540979279397, "percentage": 32.43, "elapsed_time": "0:17:54", "remaining_time": "0:37:19", "throughput": 5624.62, "total_tokens": 6043584} +{"current_steps": 12290, "total_steps": 37885, "loss": 0.059, "lr": 1.708668712298353e-06, "epoch": 1.6220139897056882, "percentage": 32.44, "elapsed_time": "0:17:54", "remaining_time": "0:37:18", "throughput": 5625.26, "total_tokens": 6046144} +{"current_steps": 12295, "total_steps": 37885, "loss": 0.0014, "lr": 1.7083435955084627e-06, "epoch": 1.6226738814834367, "percentage": 32.45, "elapsed_time": "0:17:55", "remaining_time": "0:37:17", "throughput": 5625.74, "total_tokens": 6048512} +{"current_steps": 12300, "total_steps": 37885, "loss": 0.0526, "lr": 1.7080183283777733e-06, "epoch": 1.6233337732611852, "percentage": 32.47, "elapsed_time": "0:17:55", "remaining_time": "0:37:17", "throughput": 5625.91, "total_tokens": 6050560} +{"current_steps": 12305, "total_steps": 37885, "loss": 0.0014, "lr": 1.707692910975321e-06, "epoch": 1.6239936650389337, "percentage": 32.48, "elapsed_time": "0:17:55", "remaining_time": "0:37:16", "throughput": 5626.51, "total_tokens": 6053056} +{"current_steps": 12310, "total_steps": 37885, "loss": 0.0247, "lr": 1.7073673433701733e-06, "epoch": 1.624653556816682, "percentage": 32.49, "elapsed_time": "0:17:56", "remaining_time": "0:37:15", "throughput": 5627.17, "total_tokens": 6055616} +{"current_steps": 12315, "total_steps": 37885, "loss": 0.0355, "lr": 1.7070416256314286e-06, "epoch": 1.6253134485944305, "percentage": 32.51, "elapsed_time": "0:17:56", "remaining_time": "0:37:15", "throughput": 5627.42, "total_tokens": 6057728} +{"current_steps": 12320, "total_steps": 37885, "loss": 0.0661, "lr": 1.7067157578282195e-06, "epoch": 1.625973340372179, "percentage": 32.52, "elapsed_time": "0:17:56", "remaining_time": "0:37:14", "throughput": 5628.08, "total_tokens": 6060288} +{"current_steps": 12325, "total_steps": 37885, "loss": 0.0912, "lr": 1.7063897400297083e-06, "epoch": 1.6266332321499273, "percentage": 32.53, "elapsed_time": "0:17:57", "remaining_time": "0:37:13", "throughput": 5628.55, "total_tokens": 6062656} +{"current_steps": 12330, "total_steps": 37885, "loss": 0.0712, "lr": 1.7060635723050899e-06, "epoch": 1.627293123927676, "percentage": 32.55, "elapsed_time": "0:17:57", "remaining_time": "0:37:13", "throughput": 5629.08, "total_tokens": 6065088} +{"current_steps": 12335, "total_steps": 37885, "loss": 0.0004, "lr": 1.705737254723591e-06, "epoch": 1.6279530157054243, "percentage": 32.56, "elapsed_time": "0:17:57", "remaining_time": "0:37:12", "throughput": 5629.51, "total_tokens": 6067392} +{"current_steps": 12340, "total_steps": 37885, "loss": 0.1991, "lr": 1.7054107873544704e-06, "epoch": 1.6286129074831728, "percentage": 32.57, "elapsed_time": "0:17:58", "remaining_time": "0:37:11", "throughput": 5630.01, "total_tokens": 6069760} +{"current_steps": 12345, "total_steps": 37885, "loss": 0.0003, "lr": 1.7050841702670188e-06, "epoch": 1.6292727992609213, "percentage": 32.59, "elapsed_time": "0:17:58", "remaining_time": "0:37:11", "throughput": 5630.6, "total_tokens": 6072256} +{"current_steps": 12350, "total_steps": 37885, "loss": 0.0482, "lr": 1.7047574035305576e-06, "epoch": 1.6299326910386696, "percentage": 32.6, "elapsed_time": "0:17:58", "remaining_time": "0:37:10", "throughput": 5631.14, "total_tokens": 6074688} +{"current_steps": 12355, "total_steps": 37885, "loss": 0.0002, "lr": 1.704430487214441e-06, "epoch": 1.630592582816418, "percentage": 32.61, "elapsed_time": "0:17:59", "remaining_time": "0:37:09", "throughput": 5631.74, "total_tokens": 6077184} +{"current_steps": 12360, "total_steps": 37885, "loss": 0.001, "lr": 1.7041034213880545e-06, "epoch": 1.6312524745941666, "percentage": 32.63, "elapsed_time": "0:17:59", "remaining_time": "0:37:09", "throughput": 5632.1, "total_tokens": 6079424} +{"current_steps": 12365, "total_steps": 37885, "loss": 0.0385, "lr": 1.7037762061208157e-06, "epoch": 1.631912366371915, "percentage": 32.64, "elapsed_time": "0:17:59", "remaining_time": "0:37:08", "throughput": 5632.7, "total_tokens": 6081920} +{"current_steps": 12370, "total_steps": 37885, "loss": 0.115, "lr": 1.7034488414821734e-06, "epoch": 1.6325722581496636, "percentage": 32.65, "elapsed_time": "0:18:00", "remaining_time": "0:37:07", "throughput": 5633.24, "total_tokens": 6084352} +{"current_steps": 12375, "total_steps": 37885, "loss": 0.0007, "lr": 1.7031213275416083e-06, "epoch": 1.6332321499274118, "percentage": 32.66, "elapsed_time": "0:18:00", "remaining_time": "0:37:07", "throughput": 5633.83, "total_tokens": 6086848} +{"current_steps": 12380, "total_steps": 37885, "loss": 0.1038, "lr": 1.702793664368633e-06, "epoch": 1.6338920417051603, "percentage": 32.68, "elapsed_time": "0:18:00", "remaining_time": "0:37:06", "throughput": 5634.42, "total_tokens": 6089344} +{"current_steps": 12385, "total_steps": 37885, "loss": 0.0722, "lr": 1.702465852032792e-06, "epoch": 1.6345519334829088, "percentage": 32.69, "elapsed_time": "0:18:01", "remaining_time": "0:37:05", "throughput": 5635.01, "total_tokens": 6091840} +{"current_steps": 12390, "total_steps": 37885, "loss": 0.0003, "lr": 1.7021378906036607e-06, "epoch": 1.6352118252606571, "percentage": 32.7, "elapsed_time": "0:18:01", "remaining_time": "0:37:05", "throughput": 5635.44, "total_tokens": 6094144} +{"current_steps": 12395, "total_steps": 37885, "loss": 0.0003, "lr": 1.7018097801508467e-06, "epoch": 1.6358717170384058, "percentage": 32.72, "elapsed_time": "0:18:01", "remaining_time": "0:37:04", "throughput": 5635.85, "total_tokens": 6096448} +{"current_steps": 12400, "total_steps": 37885, "loss": 0.1521, "lr": 1.7014815207439884e-06, "epoch": 1.6365316088161541, "percentage": 32.73, "elapsed_time": "0:18:02", "remaining_time": "0:37:03", "throughput": 5636.31, "total_tokens": 6098816} +{"current_steps": 12405, "total_steps": 37885, "loss": 0.1457, "lr": 1.7011531124527578e-06, "epoch": 1.6371915005939026, "percentage": 32.74, "elapsed_time": "0:18:02", "remaining_time": "0:37:03", "throughput": 5636.9, "total_tokens": 6101312} +{"current_steps": 12410, "total_steps": 37885, "loss": 0.0004, "lr": 1.7008245553468559e-06, "epoch": 1.6378513923716511, "percentage": 32.76, "elapsed_time": "0:18:02", "remaining_time": "0:37:02", "throughput": 5637.61, "total_tokens": 6103936} +{"current_steps": 12415, "total_steps": 37885, "loss": 0.0008, "lr": 1.7004958494960173e-06, "epoch": 1.6385112841493994, "percentage": 32.77, "elapsed_time": "0:18:03", "remaining_time": "0:37:01", "throughput": 5638.33, "total_tokens": 6106624} +{"current_steps": 12420, "total_steps": 37885, "loss": 0.0675, "lr": 1.7001669949700073e-06, "epoch": 1.639171175927148, "percentage": 32.78, "elapsed_time": "0:18:03", "remaining_time": "0:37:01", "throughput": 5639.18, "total_tokens": 6109440} +{"current_steps": 12425, "total_steps": 37885, "loss": 0.0088, "lr": 1.6998379918386228e-06, "epoch": 1.6398310677048964, "percentage": 32.8, "elapsed_time": "0:18:03", "remaining_time": "0:37:00", "throughput": 5639.54, "total_tokens": 6111680} +{"current_steps": 12430, "total_steps": 37885, "loss": 0.1804, "lr": 1.6995088401716922e-06, "epoch": 1.640490959482645, "percentage": 32.81, "elapsed_time": "0:18:04", "remaining_time": "0:36:59", "throughput": 5640.07, "total_tokens": 6114112} +{"current_steps": 12435, "total_steps": 37885, "loss": 0.1808, "lr": 1.6991795400390756e-06, "epoch": 1.6411508512603934, "percentage": 32.82, "elapsed_time": "0:18:04", "remaining_time": "0:36:59", "throughput": 5640.6, "total_tokens": 6116544} +{"current_steps": 12440, "total_steps": 37885, "loss": 0.001, "lr": 1.698850091510665e-06, "epoch": 1.6418107430381417, "percentage": 32.84, "elapsed_time": "0:18:04", "remaining_time": "0:36:58", "throughput": 5641.22, "total_tokens": 6119104} +{"current_steps": 12445, "total_steps": 37885, "loss": 0.1505, "lr": 1.6985204946563831e-06, "epoch": 1.6424706348158902, "percentage": 32.85, "elapsed_time": "0:18:05", "remaining_time": "0:36:58", "throughput": 5641.64, "total_tokens": 6121408} +{"current_steps": 12450, "total_steps": 37885, "loss": 0.0506, "lr": 1.6981907495461845e-06, "epoch": 1.6431305265936387, "percentage": 32.86, "elapsed_time": "0:18:05", "remaining_time": "0:36:57", "throughput": 5642.37, "total_tokens": 6124096} +{"current_steps": 12455, "total_steps": 37885, "loss": 0.001, "lr": 1.697860856250055e-06, "epoch": 1.643790418371387, "percentage": 32.88, "elapsed_time": "0:18:05", "remaining_time": "0:36:56", "throughput": 5642.6, "total_tokens": 6126208} +{"current_steps": 12460, "total_steps": 37885, "loss": 0.1091, "lr": 1.6975308148380125e-06, "epoch": 1.6444503101491357, "percentage": 32.89, "elapsed_time": "0:18:06", "remaining_time": "0:36:56", "throughput": 5643.19, "total_tokens": 6128704} +{"current_steps": 12465, "total_steps": 37885, "loss": 0.0021, "lr": 1.6972006253801055e-06, "epoch": 1.645110201926884, "percentage": 32.9, "elapsed_time": "0:18:06", "remaining_time": "0:36:55", "throughput": 5643.55, "total_tokens": 6130944} +{"current_steps": 12470, "total_steps": 37885, "loss": 0.1985, "lr": 1.6968702879464148e-06, "epoch": 1.6457700937046325, "percentage": 32.92, "elapsed_time": "0:18:06", "remaining_time": "0:36:54", "throughput": 5643.96, "total_tokens": 6133248} +{"current_steps": 12475, "total_steps": 37885, "loss": 0.2054, "lr": 1.6965398026070517e-06, "epoch": 1.646429985482381, "percentage": 32.93, "elapsed_time": "0:18:07", "remaining_time": "0:36:54", "throughput": 5644.56, "total_tokens": 6135744} +{"current_steps": 12480, "total_steps": 37885, "loss": 0.0839, "lr": 1.6962091694321595e-06, "epoch": 1.6470898772601292, "percentage": 32.94, "elapsed_time": "0:18:07", "remaining_time": "0:36:53", "throughput": 5645.03, "total_tokens": 6138112} +{"current_steps": 12485, "total_steps": 37885, "loss": 0.1114, "lr": 1.6958783884919124e-06, "epoch": 1.647749769037878, "percentage": 32.95, "elapsed_time": "0:18:07", "remaining_time": "0:36:52", "throughput": 5645.61, "total_tokens": 6140608} +{"current_steps": 12490, "total_steps": 37885, "loss": 0.0034, "lr": 1.6955474598565163e-06, "epoch": 1.6484096608156262, "percentage": 32.97, "elapsed_time": "0:18:08", "remaining_time": "0:36:52", "throughput": 5646.18, "total_tokens": 6143104} +{"current_steps": 12495, "total_steps": 37885, "loss": 0.0456, "lr": 1.695216383596209e-06, "epoch": 1.6490695525933747, "percentage": 32.98, "elapsed_time": "0:18:08", "remaining_time": "0:36:51", "throughput": 5646.72, "total_tokens": 6145536} +{"current_steps": 12500, "total_steps": 37885, "loss": 0.0014, "lr": 1.6948851597812586e-06, "epoch": 1.6497294443711232, "percentage": 32.99, "elapsed_time": "0:18:08", "remaining_time": "0:36:50", "throughput": 5647.35, "total_tokens": 6148096} +{"current_steps": 12505, "total_steps": 37885, "loss": 0.1411, "lr": 1.694553788481965e-06, "epoch": 1.6503893361488715, "percentage": 33.01, "elapsed_time": "0:18:09", "remaining_time": "0:36:50", "throughput": 5648.25, "total_tokens": 6150976} +{"current_steps": 12510, "total_steps": 37885, "loss": 0.1109, "lr": 1.6942222697686593e-06, "epoch": 1.65104922792662, "percentage": 33.02, "elapsed_time": "0:18:09", "remaining_time": "0:36:49", "throughput": 5648.79, "total_tokens": 6153408} +{"current_steps": 12515, "total_steps": 37885, "loss": 0.0007, "lr": 1.6938906037117039e-06, "epoch": 1.6517091197043685, "percentage": 33.03, "elapsed_time": "0:18:09", "remaining_time": "0:36:48", "throughput": 5649.47, "total_tokens": 6156032} +{"current_steps": 12520, "total_steps": 37885, "loss": 0.003, "lr": 1.6935587903814926e-06, "epoch": 1.6523690114821168, "percentage": 33.05, "elapsed_time": "0:18:10", "remaining_time": "0:36:48", "throughput": 5650.25, "total_tokens": 6158784} +{"current_steps": 12525, "total_steps": 37885, "loss": 0.0005, "lr": 1.6932268298484508e-06, "epoch": 1.6530289032598655, "percentage": 33.06, "elapsed_time": "0:18:10", "remaining_time": "0:36:47", "throughput": 5650.92, "total_tokens": 6161408} +{"current_steps": 12530, "total_steps": 37885, "loss": 0.0756, "lr": 1.692894722183034e-06, "epoch": 1.6536887950376138, "percentage": 33.07, "elapsed_time": "0:18:10", "remaining_time": "0:36:47", "throughput": 5651.35, "total_tokens": 6163712} +{"current_steps": 12535, "total_steps": 37885, "loss": 0.0003, "lr": 1.6925624674557298e-06, "epoch": 1.6543486868153623, "percentage": 33.09, "elapsed_time": "0:18:10", "remaining_time": "0:36:46", "throughput": 5651.71, "total_tokens": 6165952} +{"current_steps": 12540, "total_steps": 37885, "loss": 0.0918, "lr": 1.6922300657370573e-06, "epoch": 1.6550085785931108, "percentage": 33.1, "elapsed_time": "0:18:11", "remaining_time": "0:36:45", "throughput": 5652.08, "total_tokens": 6168192} +{"current_steps": 12545, "total_steps": 37885, "loss": 0.0002, "lr": 1.691897517097566e-06, "epoch": 1.655668470370859, "percentage": 33.11, "elapsed_time": "0:18:11", "remaining_time": "0:36:45", "throughput": 5652.49, "total_tokens": 6170496} +{"current_steps": 12550, "total_steps": 37885, "loss": 0.0005, "lr": 1.6915648216078374e-06, "epoch": 1.6563283621486078, "percentage": 33.13, "elapsed_time": "0:18:11", "remaining_time": "0:36:44", "throughput": 5653.23, "total_tokens": 6173184} +{"current_steps": 12555, "total_steps": 37885, "loss": 0.0884, "lr": 1.691231979338483e-06, "epoch": 1.656988253926356, "percentage": 33.14, "elapsed_time": "0:18:12", "remaining_time": "0:36:43", "throughput": 5653.71, "total_tokens": 6175552} +{"current_steps": 12560, "total_steps": 37885, "loss": 0.1239, "lr": 1.690898990360146e-06, "epoch": 1.6576481457041046, "percentage": 33.15, "elapsed_time": "0:18:12", "remaining_time": "0:36:43", "throughput": 5654.28, "total_tokens": 6178048} +{"current_steps": 12565, "total_steps": 37885, "loss": 0.1286, "lr": 1.690565854743502e-06, "epoch": 1.658308037481853, "percentage": 33.17, "elapsed_time": "0:18:12", "remaining_time": "0:36:42", "throughput": 5654.85, "total_tokens": 6180544} +{"current_steps": 12570, "total_steps": 37885, "loss": 0.0003, "lr": 1.690232572559256e-06, "epoch": 1.6589679292596013, "percentage": 33.18, "elapsed_time": "0:18:13", "remaining_time": "0:36:41", "throughput": 5655.44, "total_tokens": 6183040} +{"current_steps": 12575, "total_steps": 37885, "loss": 0.1679, "lr": 1.6898991438781445e-06, "epoch": 1.6596278210373498, "percentage": 33.19, "elapsed_time": "0:18:13", "remaining_time": "0:36:41", "throughput": 5656.37, "total_tokens": 6185984} +{"current_steps": 12580, "total_steps": 37885, "loss": 0.0464, "lr": 1.6895655687709356e-06, "epoch": 1.6602877128150983, "percentage": 33.21, "elapsed_time": "0:18:13", "remaining_time": "0:36:40", "throughput": 5656.97, "total_tokens": 6188480} +{"current_steps": 12585, "total_steps": 37885, "loss": 0.071, "lr": 1.6892318473084283e-06, "epoch": 1.6609476045928466, "percentage": 33.22, "elapsed_time": "0:18:14", "remaining_time": "0:36:39", "throughput": 5657.65, "total_tokens": 6191104} +{"current_steps": 12590, "total_steps": 37885, "loss": 0.404, "lr": 1.6888979795614524e-06, "epoch": 1.6616074963705953, "percentage": 33.23, "elapsed_time": "0:18:14", "remaining_time": "0:36:39", "throughput": 5658.28, "total_tokens": 6193664} +{"current_steps": 12595, "total_steps": 37885, "loss": 0.1, "lr": 1.688563965600869e-06, "epoch": 1.6622673881483436, "percentage": 33.25, "elapsed_time": "0:18:14", "remaining_time": "0:36:38", "throughput": 5659.13, "total_tokens": 6196480} +{"current_steps": 12600, "total_steps": 37885, "loss": 0.1222, "lr": 1.68822980549757e-06, "epoch": 1.6629272799260921, "percentage": 33.26, "elapsed_time": "0:18:15", "remaining_time": "0:36:37", "throughput": 5659.81, "total_tokens": 6199104} +{"current_steps": 12605, "total_steps": 37885, "loss": 0.0012, "lr": 1.6878954993224786e-06, "epoch": 1.6635871717038406, "percentage": 33.27, "elapsed_time": "0:18:15", "remaining_time": "0:36:37", "throughput": 5660.11, "total_tokens": 6201280} +{"current_steps": 12610, "total_steps": 37885, "loss": 0.0029, "lr": 1.687561047146549e-06, "epoch": 1.664247063481589, "percentage": 33.28, "elapsed_time": "0:18:15", "remaining_time": "0:36:36", "throughput": 5660.69, "total_tokens": 6203776} +{"current_steps": 12615, "total_steps": 37885, "loss": 0.1658, "lr": 1.6872264490407656e-06, "epoch": 1.6649069552593376, "percentage": 33.3, "elapsed_time": "0:18:16", "remaining_time": "0:36:36", "throughput": 5661.43, "total_tokens": 6206464} +{"current_steps": 12620, "total_steps": 37885, "loss": 0.003, "lr": 1.686891705076145e-06, "epoch": 1.665566847037086, "percentage": 33.31, "elapsed_time": "0:18:16", "remaining_time": "0:36:35", "throughput": 5661.94, "total_tokens": 6208896} +{"current_steps": 12625, "total_steps": 37885, "loss": 0.061, "lr": 1.6865568153237343e-06, "epoch": 1.6662267388148344, "percentage": 33.32, "elapsed_time": "0:18:16", "remaining_time": "0:36:34", "throughput": 5662.3, "total_tokens": 6211136} +{"current_steps": 12630, "total_steps": 37885, "loss": 0.0895, "lr": 1.6862217798546115e-06, "epoch": 1.666886630592583, "percentage": 33.34, "elapsed_time": "0:18:17", "remaining_time": "0:36:34", "throughput": 5662.82, "total_tokens": 6213568} +{"current_steps": 12635, "total_steps": 37885, "loss": 0.2668, "lr": 1.6858865987398847e-06, "epoch": 1.6675465223703312, "percentage": 33.35, "elapsed_time": "0:18:17", "remaining_time": "0:36:33", "throughput": 5663.3, "total_tokens": 6215936} +{"current_steps": 12640, "total_steps": 37885, "loss": 0.0006, "lr": 1.6855512720506941e-06, "epoch": 1.6682064141480797, "percentage": 33.36, "elapsed_time": "0:18:17", "remaining_time": "0:36:32", "throughput": 5663.99, "total_tokens": 6218560} +{"current_steps": 12645, "total_steps": 37885, "loss": 0.1121, "lr": 1.6852157998582106e-06, "epoch": 1.6688663059258282, "percentage": 33.38, "elapsed_time": "0:18:18", "remaining_time": "0:36:32", "throughput": 5664.68, "total_tokens": 6221184} +{"current_steps": 12650, "total_steps": 37885, "loss": 0.0836, "lr": 1.6848801822336355e-06, "epoch": 1.6695261977035765, "percentage": 33.39, "elapsed_time": "0:18:18", "remaining_time": "0:36:31", "throughput": 5665.15, "total_tokens": 6223552} +{"current_steps": 12655, "total_steps": 37885, "loss": 0.0497, "lr": 1.684544419248201e-06, "epoch": 1.6701860894813252, "percentage": 33.4, "elapsed_time": "0:18:18", "remaining_time": "0:36:30", "throughput": 5665.92, "total_tokens": 6226304} +{"current_steps": 12660, "total_steps": 37885, "loss": 0.1245, "lr": 1.6842085109731708e-06, "epoch": 1.6708459812590735, "percentage": 33.42, "elapsed_time": "0:18:19", "remaining_time": "0:36:30", "throughput": 5666.53, "total_tokens": 6228864} +{"current_steps": 12665, "total_steps": 37885, "loss": 0.0763, "lr": 1.6838724574798387e-06, "epoch": 1.671505873036822, "percentage": 33.43, "elapsed_time": "0:18:19", "remaining_time": "0:36:29", "throughput": 5667.25, "total_tokens": 6231552} +{"current_steps": 12670, "total_steps": 37885, "loss": 0.0017, "lr": 1.6835362588395298e-06, "epoch": 1.6721657648145705, "percentage": 33.44, "elapsed_time": "0:18:19", "remaining_time": "0:36:28", "throughput": 5667.65, "total_tokens": 6233856} +{"current_steps": 12675, "total_steps": 37885, "loss": 0.0827, "lr": 1.6831999151235995e-06, "epoch": 1.6728256565923187, "percentage": 33.46, "elapsed_time": "0:18:20", "remaining_time": "0:36:28", "throughput": 5668.68, "total_tokens": 6236928} +{"current_steps": 12680, "total_steps": 37885, "loss": 0.0704, "lr": 1.682863426403435e-06, "epoch": 1.6734855483700675, "percentage": 33.47, "elapsed_time": "0:18:20", "remaining_time": "0:36:27", "throughput": 5669.35, "total_tokens": 6239552} +{"current_steps": 12685, "total_steps": 37885, "loss": 0.0288, "lr": 1.682526792750453e-06, "epoch": 1.6741454401478157, "percentage": 33.48, "elapsed_time": "0:18:20", "remaining_time": "0:36:27", "throughput": 5669.81, "total_tokens": 6241920} +{"current_steps": 12690, "total_steps": 37885, "loss": 0.2077, "lr": 1.6821900142361015e-06, "epoch": 1.6748053319255642, "percentage": 33.5, "elapsed_time": "0:18:21", "remaining_time": "0:36:26", "throughput": 5670.16, "total_tokens": 6244160} +{"current_steps": 12695, "total_steps": 37885, "loss": 0.2284, "lr": 1.6818530909318595e-06, "epoch": 1.6754652237033127, "percentage": 33.51, "elapsed_time": "0:18:21", "remaining_time": "0:36:25", "throughput": 5670.79, "total_tokens": 6246720} +{"current_steps": 12700, "total_steps": 37885, "loss": 0.0043, "lr": 1.6815160229092367e-06, "epoch": 1.676125115481061, "percentage": 33.52, "elapsed_time": "0:18:21", "remaining_time": "0:36:25", "throughput": 5671.26, "total_tokens": 6249088} +{"current_steps": 12705, "total_steps": 37885, "loss": 0.2497, "lr": 1.6811788102397733e-06, "epoch": 1.6767850072588095, "percentage": 33.54, "elapsed_time": "0:18:22", "remaining_time": "0:36:24", "throughput": 5671.73, "total_tokens": 6251456} +{"current_steps": 12710, "total_steps": 37885, "loss": 0.0581, "lr": 1.68084145299504e-06, "epoch": 1.677444899036558, "percentage": 33.55, "elapsed_time": "0:18:22", "remaining_time": "0:36:23", "throughput": 5672.54, "total_tokens": 6254272} +{"current_steps": 12715, "total_steps": 37885, "loss": 0.0853, "lr": 1.6805039512466385e-06, "epoch": 1.6781047908143063, "percentage": 33.56, "elapsed_time": "0:18:22", "remaining_time": "0:36:23", "throughput": 5673.05, "total_tokens": 6256704} +{"current_steps": 12720, "total_steps": 37885, "loss": 0.0476, "lr": 1.6801663050662012e-06, "epoch": 1.678764682592055, "percentage": 33.58, "elapsed_time": "0:18:23", "remaining_time": "0:36:22", "throughput": 5673.52, "total_tokens": 6259072} +{"current_steps": 12725, "total_steps": 37885, "loss": 0.0021, "lr": 1.6798285145253907e-06, "epoch": 1.6794245743698033, "percentage": 33.59, "elapsed_time": "0:18:23", "remaining_time": "0:36:21", "throughput": 5674.13, "total_tokens": 6261632} +{"current_steps": 12730, "total_steps": 37885, "loss": 0.047, "lr": 1.6794905796959017e-06, "epoch": 1.6800844661475518, "percentage": 33.6, "elapsed_time": "0:18:23", "remaining_time": "0:36:21", "throughput": 5674.76, "total_tokens": 6264192} +{"current_steps": 12735, "total_steps": 37885, "loss": 0.0419, "lr": 1.6791525006494572e-06, "epoch": 1.6807443579253003, "percentage": 33.61, "elapsed_time": "0:18:24", "remaining_time": "0:36:20", "throughput": 5675.27, "total_tokens": 6266624} +{"current_steps": 12740, "total_steps": 37885, "loss": 0.3103, "lr": 1.6788142774578126e-06, "epoch": 1.6814042497030486, "percentage": 33.63, "elapsed_time": "0:18:24", "remaining_time": "0:36:20", "throughput": 5675.78, "total_tokens": 6269056} +{"current_steps": 12745, "total_steps": 37885, "loss": 0.0926, "lr": 1.678475910192753e-06, "epoch": 1.6820641414807973, "percentage": 33.64, "elapsed_time": "0:18:24", "remaining_time": "0:36:19", "throughput": 5676.13, "total_tokens": 6271296} +{"current_steps": 12750, "total_steps": 37885, "loss": 0.2126, "lr": 1.6781373989260948e-06, "epoch": 1.6827240332585456, "percentage": 33.65, "elapsed_time": "0:18:25", "remaining_time": "0:36:18", "throughput": 5676.53, "total_tokens": 6273600} +{"current_steps": 12755, "total_steps": 37885, "loss": 0.0928, "lr": 1.6777987437296842e-06, "epoch": 1.683383925036294, "percentage": 33.67, "elapsed_time": "0:18:25", "remaining_time": "0:36:18", "throughput": 5677.14, "total_tokens": 6276160} +{"current_steps": 12760, "total_steps": 37885, "loss": 0.0657, "lr": 1.6774599446753984e-06, "epoch": 1.6840438168140426, "percentage": 33.68, "elapsed_time": "0:18:25", "remaining_time": "0:36:17", "throughput": 5677.77, "total_tokens": 6278720} +{"current_steps": 12765, "total_steps": 37885, "loss": 0.2448, "lr": 1.6771210018351453e-06, "epoch": 1.6847037085917909, "percentage": 33.69, "elapsed_time": "0:18:26", "remaining_time": "0:36:16", "throughput": 5678.29, "total_tokens": 6281152} +{"current_steps": 12770, "total_steps": 37885, "loss": 0.0394, "lr": 1.6767819152808627e-06, "epoch": 1.6853636003695394, "percentage": 33.71, "elapsed_time": "0:18:26", "remaining_time": "0:36:16", "throughput": 5678.65, "total_tokens": 6283392} +{"current_steps": 12775, "total_steps": 37885, "loss": 0.064, "lr": 1.6764426850845194e-06, "epoch": 1.6860234921472879, "percentage": 33.72, "elapsed_time": "0:18:26", "remaining_time": "0:36:15", "throughput": 5678.88, "total_tokens": 6285504} +{"current_steps": 12780, "total_steps": 37885, "loss": 0.306, "lr": 1.676103311318115e-06, "epoch": 1.6866833839250361, "percentage": 33.73, "elapsed_time": "0:18:27", "remaining_time": "0:36:14", "throughput": 5679.38, "total_tokens": 6287936} +{"current_steps": 12785, "total_steps": 37885, "loss": 0.0701, "lr": 1.6757637940536787e-06, "epoch": 1.6873432757027849, "percentage": 33.75, "elapsed_time": "0:18:27", "remaining_time": "0:36:14", "throughput": 5679.99, "total_tokens": 6290496} +{"current_steps": 12790, "total_steps": 37885, "loss": 0.0025, "lr": 1.6754241333632705e-06, "epoch": 1.6880031674805331, "percentage": 33.76, "elapsed_time": "0:18:27", "remaining_time": "0:36:13", "throughput": 5680.6, "total_tokens": 6293056} +{"current_steps": 12795, "total_steps": 37885, "loss": 0.0408, "lr": 1.6750843293189806e-06, "epoch": 1.6886630592582816, "percentage": 33.77, "elapsed_time": "0:18:28", "remaining_time": "0:36:12", "throughput": 5681.1, "total_tokens": 6295488} +{"current_steps": 12800, "total_steps": 37885, "loss": 0.0907, "lr": 1.674744381992931e-06, "epoch": 1.6893229510360301, "percentage": 33.79, "elapsed_time": "0:18:28", "remaining_time": "0:36:12", "throughput": 5681.55, "total_tokens": 6297856} +{"current_steps": 12805, "total_steps": 37885, "loss": 0.18, "lr": 1.674404291457272e-06, "epoch": 1.6899828428137784, "percentage": 33.8, "elapsed_time": "0:18:28", "remaining_time": "0:36:11", "throughput": 5681.95, "total_tokens": 6300160} +{"current_steps": 12810, "total_steps": 37885, "loss": 0.1037, "lr": 1.6740640577841862e-06, "epoch": 1.6906427345915271, "percentage": 33.81, "elapsed_time": "0:18:29", "remaining_time": "0:36:11", "throughput": 5682.77, "total_tokens": 6302976} +{"current_steps": 12815, "total_steps": 37885, "loss": 0.003, "lr": 1.673723681045885e-06, "epoch": 1.6913026263692754, "percentage": 33.83, "elapsed_time": "0:18:29", "remaining_time": "0:36:10", "throughput": 5683.27, "total_tokens": 6305408} +{"current_steps": 12820, "total_steps": 37885, "loss": 0.001, "lr": 1.6733831613146113e-06, "epoch": 1.691962518147024, "percentage": 33.84, "elapsed_time": "0:18:29", "remaining_time": "0:36:09", "throughput": 5684.05, "total_tokens": 6308160} +{"current_steps": 12825, "total_steps": 37885, "loss": 0.101, "lr": 1.673042498662638e-06, "epoch": 1.6926224099247724, "percentage": 33.85, "elapsed_time": "0:18:30", "remaining_time": "0:36:09", "throughput": 5684.62, "total_tokens": 6310656} +{"current_steps": 12830, "total_steps": 37885, "loss": 0.0167, "lr": 1.672701693162268e-06, "epoch": 1.6932823017025207, "percentage": 33.87, "elapsed_time": "0:18:30", "remaining_time": "0:36:08", "throughput": 5685.07, "total_tokens": 6313024} +{"current_steps": 12835, "total_steps": 37885, "loss": 0.0606, "lr": 1.672360744885835e-06, "epoch": 1.6939421934802692, "percentage": 33.88, "elapsed_time": "0:18:30", "remaining_time": "0:36:07", "throughput": 5685.67, "total_tokens": 6315584} +{"current_steps": 12840, "total_steps": 37885, "loss": 0.0008, "lr": 1.6720196539057025e-06, "epoch": 1.6946020852580177, "percentage": 33.89, "elapsed_time": "0:18:31", "remaining_time": "0:36:07", "throughput": 5686.18, "total_tokens": 6318016} +{"current_steps": 12845, "total_steps": 37885, "loss": 0.2314, "lr": 1.671678420294265e-06, "epoch": 1.695261977035766, "percentage": 33.91, "elapsed_time": "0:18:31", "remaining_time": "0:36:06", "throughput": 5687.04, "total_tokens": 6320896} +{"current_steps": 12850, "total_steps": 37885, "loss": 0.0503, "lr": 1.6713370441239469e-06, "epoch": 1.6959218688135147, "percentage": 33.92, "elapsed_time": "0:18:31", "remaining_time": "0:36:06", "throughput": 5687.55, "total_tokens": 6323328} +{"current_steps": 12855, "total_steps": 37885, "loss": 0.1382, "lr": 1.6709955254672026e-06, "epoch": 1.696581760591263, "percentage": 33.93, "elapsed_time": "0:18:32", "remaining_time": "0:36:05", "throughput": 5688.06, "total_tokens": 6325760} +{"current_steps": 12860, "total_steps": 37885, "loss": 0.066, "lr": 1.670653864396517e-06, "epoch": 1.6972416523690115, "percentage": 33.94, "elapsed_time": "0:18:32", "remaining_time": "0:36:04", "throughput": 5688.6, "total_tokens": 6328256} +{"current_steps": 12865, "total_steps": 37885, "loss": 0.1217, "lr": 1.670312060984405e-06, "epoch": 1.69790154414676, "percentage": 33.96, "elapsed_time": "0:18:32", "remaining_time": "0:36:04", "throughput": 5689.21, "total_tokens": 6330816} +{"current_steps": 12870, "total_steps": 37885, "loss": 0.1156, "lr": 1.669970115303412e-06, "epoch": 1.6985614359245083, "percentage": 33.97, "elapsed_time": "0:18:33", "remaining_time": "0:36:03", "throughput": 5689.65, "total_tokens": 6333184} +{"current_steps": 12875, "total_steps": 37885, "loss": 0.0019, "lr": 1.6696280274261137e-06, "epoch": 1.699221327702257, "percentage": 33.98, "elapsed_time": "0:18:33", "remaining_time": "0:36:02", "throughput": 5690.24, "total_tokens": 6335744} +{"current_steps": 12880, "total_steps": 37885, "loss": 0.1754, "lr": 1.6692857974251156e-06, "epoch": 1.6998812194800053, "percentage": 34.0, "elapsed_time": "0:18:33", "remaining_time": "0:36:02", "throughput": 5690.81, "total_tokens": 6338240} +{"current_steps": 12885, "total_steps": 37885, "loss": 0.0014, "lr": 1.668943425373054e-06, "epoch": 1.7005411112577538, "percentage": 34.01, "elapsed_time": "0:18:34", "remaining_time": "0:36:02", "throughput": 5689.98, "total_tokens": 6340672} +{"current_steps": 12890, "total_steps": 37885, "loss": 0.0742, "lr": 1.668600911342594e-06, "epoch": 1.7012010030355023, "percentage": 34.02, "elapsed_time": "0:18:34", "remaining_time": "0:36:01", "throughput": 5690.49, "total_tokens": 6343104} +{"current_steps": 12895, "total_steps": 37885, "loss": 0.0573, "lr": 1.668258255406432e-06, "epoch": 1.7018608948132505, "percentage": 34.04, "elapsed_time": "0:18:35", "remaining_time": "0:36:00", "throughput": 5691.25, "total_tokens": 6345856} +{"current_steps": 12900, "total_steps": 37885, "loss": 0.1826, "lr": 1.6679154576372949e-06, "epoch": 1.702520786590999, "percentage": 34.05, "elapsed_time": "0:18:35", "remaining_time": "0:36:00", "throughput": 5691.67, "total_tokens": 6348224} +{"current_steps": 12905, "total_steps": 37885, "loss": 0.1554, "lr": 1.6675725181079384e-06, "epoch": 1.7031806783687475, "percentage": 34.06, "elapsed_time": "0:18:35", "remaining_time": "0:35:59", "throughput": 5692.27, "total_tokens": 6350784} +{"current_steps": 12910, "total_steps": 37885, "loss": 0.0764, "lr": 1.6672294368911493e-06, "epoch": 1.7038405701464958, "percentage": 34.08, "elapsed_time": "0:18:36", "remaining_time": "0:35:58", "throughput": 5692.86, "total_tokens": 6353344} +{"current_steps": 12915, "total_steps": 37885, "loss": 0.0498, "lr": 1.6668862140597434e-06, "epoch": 1.7045004619242445, "percentage": 34.09, "elapsed_time": "0:18:36", "remaining_time": "0:35:58", "throughput": 5693.2, "total_tokens": 6355584} +{"current_steps": 12920, "total_steps": 37885, "loss": 0.0021, "lr": 1.6665428496865684e-06, "epoch": 1.7051603537019928, "percentage": 34.1, "elapsed_time": "0:18:36", "remaining_time": "0:35:57", "throughput": 5693.84, "total_tokens": 6358208} +{"current_steps": 12925, "total_steps": 37885, "loss": 0.1555, "lr": 1.6661993438445e-06, "epoch": 1.7058202454797413, "percentage": 34.12, "elapsed_time": "0:18:37", "remaining_time": "0:35:57", "throughput": 5694.28, "total_tokens": 6360576} +{"current_steps": 12930, "total_steps": 37885, "loss": 0.0649, "lr": 1.665855696606445e-06, "epoch": 1.7064801372574898, "percentage": 34.13, "elapsed_time": "0:18:37", "remaining_time": "0:35:56", "throughput": 5694.73, "total_tokens": 6362944} +{"current_steps": 12935, "total_steps": 37885, "loss": 0.1484, "lr": 1.6655119080453402e-06, "epoch": 1.707140029035238, "percentage": 34.14, "elapsed_time": "0:18:37", "remaining_time": "0:35:55", "throughput": 5695.29, "total_tokens": 6365440} +{"current_steps": 12940, "total_steps": 37885, "loss": 0.1357, "lr": 1.6651679782341524e-06, "epoch": 1.7077999208129868, "percentage": 34.16, "elapsed_time": "0:18:37", "remaining_time": "0:35:55", "throughput": 5695.75, "total_tokens": 6367808} +{"current_steps": 12945, "total_steps": 37885, "loss": 0.1377, "lr": 1.6648239072458777e-06, "epoch": 1.708459812590735, "percentage": 34.17, "elapsed_time": "0:18:38", "remaining_time": "0:35:54", "throughput": 5696.3, "total_tokens": 6370304} +{"current_steps": 12950, "total_steps": 37885, "loss": 0.0023, "lr": 1.6644796951535432e-06, "epoch": 1.7091197043684836, "percentage": 34.18, "elapsed_time": "0:18:38", "remaining_time": "0:35:53", "throughput": 5696.64, "total_tokens": 6372544} +{"current_steps": 12955, "total_steps": 37885, "loss": 0.0224, "lr": 1.664135342030205e-06, "epoch": 1.709779596146232, "percentage": 34.2, "elapsed_time": "0:18:38", "remaining_time": "0:35:53", "throughput": 5697.36, "total_tokens": 6375232} +{"current_steps": 12960, "total_steps": 37885, "loss": 0.0619, "lr": 1.6637908479489496e-06, "epoch": 1.7104394879239804, "percentage": 34.21, "elapsed_time": "0:18:39", "remaining_time": "0:35:52", "throughput": 5697.86, "total_tokens": 6377664} +{"current_steps": 12965, "total_steps": 37885, "loss": 0.1494, "lr": 1.6634462129828938e-06, "epoch": 1.7110993797017289, "percentage": 34.22, "elapsed_time": "0:18:39", "remaining_time": "0:35:52", "throughput": 5698.29, "total_tokens": 6380032} +{"current_steps": 12970, "total_steps": 37885, "loss": 0.0607, "lr": 1.6631014372051836e-06, "epoch": 1.7117592714794774, "percentage": 34.24, "elapsed_time": "0:18:39", "remaining_time": "0:35:51", "throughput": 5698.78, "total_tokens": 6382464} +{"current_steps": 12975, "total_steps": 37885, "loss": 0.1611, "lr": 1.6627565206889953e-06, "epoch": 1.7124191632572257, "percentage": 34.25, "elapsed_time": "0:18:40", "remaining_time": "0:35:50", "throughput": 5698.95, "total_tokens": 6384512} +{"current_steps": 12980, "total_steps": 37885, "loss": 0.0027, "lr": 1.6624114635075344e-06, "epoch": 1.7130790550349744, "percentage": 34.26, "elapsed_time": "0:18:40", "remaining_time": "0:35:50", "throughput": 5699.56, "total_tokens": 6387072} +{"current_steps": 12985, "total_steps": 37885, "loss": 0.0497, "lr": 1.6620662657340371e-06, "epoch": 1.7137389468127227, "percentage": 34.27, "elapsed_time": "0:18:40", "remaining_time": "0:35:49", "throughput": 5700.21, "total_tokens": 6389696} +{"current_steps": 12990, "total_steps": 37885, "loss": 0.0436, "lr": 1.66172092744177e-06, "epoch": 1.7143988385904712, "percentage": 34.29, "elapsed_time": "0:18:41", "remaining_time": "0:35:48", "throughput": 5700.67, "total_tokens": 6392064} +{"current_steps": 12995, "total_steps": 37885, "loss": 0.0485, "lr": 1.661375448704027e-06, "epoch": 1.7150587303682197, "percentage": 34.3, "elapsed_time": "0:18:41", "remaining_time": "0:35:48", "throughput": 5701.42, "total_tokens": 6394816} +{"current_steps": 13000, "total_steps": 37885, "loss": 0.1353, "lr": 1.6610298295941347e-06, "epoch": 1.715718622145968, "percentage": 34.31, "elapsed_time": "0:18:41", "remaining_time": "0:35:47", "throughput": 5702.02, "total_tokens": 6397376} +{"current_steps": 13005, "total_steps": 37885, "loss": 0.1308, "lr": 1.6606840701854476e-06, "epoch": 1.7163785139237167, "percentage": 34.33, "elapsed_time": "0:18:42", "remaining_time": "0:35:47", "throughput": 5702.61, "total_tokens": 6399936} +{"current_steps": 13010, "total_steps": 37885, "loss": 0.1169, "lr": 1.660338170551351e-06, "epoch": 1.717038405701465, "percentage": 34.34, "elapsed_time": "0:18:42", "remaining_time": "0:35:46", "throughput": 5703.25, "total_tokens": 6402560} +{"current_steps": 13015, "total_steps": 37885, "loss": 0.081, "lr": 1.6599921307652598e-06, "epoch": 1.7176982974792134, "percentage": 34.35, "elapsed_time": "0:18:42", "remaining_time": "0:35:45", "throughput": 5703.69, "total_tokens": 6404928} +{"current_steps": 13020, "total_steps": 37885, "loss": 0.0065, "lr": 1.659645950900618e-06, "epoch": 1.718358189256962, "percentage": 34.37, "elapsed_time": "0:18:43", "remaining_time": "0:35:45", "throughput": 5704.35, "total_tokens": 6407552} +{"current_steps": 13025, "total_steps": 37885, "loss": 0.1286, "lr": 1.6592996310308997e-06, "epoch": 1.7190180810347102, "percentage": 34.38, "elapsed_time": "0:18:43", "remaining_time": "0:35:44", "throughput": 5705.04, "total_tokens": 6410240} +{"current_steps": 13030, "total_steps": 37885, "loss": 0.1372, "lr": 1.658953171229609e-06, "epoch": 1.7196779728124587, "percentage": 34.39, "elapsed_time": "0:18:43", "remaining_time": "0:35:43", "throughput": 5705.37, "total_tokens": 6412480} +{"current_steps": 13035, "total_steps": 37885, "loss": 0.0805, "lr": 1.6586065715702797e-06, "epoch": 1.7203378645902072, "percentage": 34.41, "elapsed_time": "0:18:44", "remaining_time": "0:35:43", "throughput": 5706.01, "total_tokens": 6415104} +{"current_steps": 13040, "total_steps": 37885, "loss": 0.04, "lr": 1.658259832126475e-06, "epoch": 1.7209977563679555, "percentage": 34.42, "elapsed_time": "0:18:44", "remaining_time": "0:35:42", "throughput": 5706.66, "total_tokens": 6417728} +{"current_steps": 13045, "total_steps": 37885, "loss": 0.1079, "lr": 1.6579129529717872e-06, "epoch": 1.7216576481457042, "percentage": 34.43, "elapsed_time": "0:18:44", "remaining_time": "0:35:42", "throughput": 5707.45, "total_tokens": 6420544} +{"current_steps": 13050, "total_steps": 37885, "loss": 0.0819, "lr": 1.6575659341798396e-06, "epoch": 1.7223175399234525, "percentage": 34.45, "elapsed_time": "0:18:45", "remaining_time": "0:35:41", "throughput": 5707.95, "total_tokens": 6422976} +{"current_steps": 13055, "total_steps": 37885, "loss": 0.1217, "lr": 1.6572187758242842e-06, "epoch": 1.722977431701201, "percentage": 34.46, "elapsed_time": "0:18:45", "remaining_time": "0:35:40", "throughput": 5708.27, "total_tokens": 6425216} +{"current_steps": 13060, "total_steps": 37885, "loss": 0.3001, "lr": 1.6568714779788024e-06, "epoch": 1.7236373234789495, "percentage": 34.47, "elapsed_time": "0:18:45", "remaining_time": "0:35:40", "throughput": 5708.58, "total_tokens": 6427456} +{"current_steps": 13065, "total_steps": 37885, "loss": 0.0018, "lr": 1.6565240407171067e-06, "epoch": 1.7242972152566978, "percentage": 34.49, "elapsed_time": "0:18:46", "remaining_time": "0:35:39", "throughput": 5709.03, "total_tokens": 6429824} +{"current_steps": 13070, "total_steps": 37885, "loss": 0.1615, "lr": 1.6561764641129371e-06, "epoch": 1.7249571070344465, "percentage": 34.5, "elapsed_time": "0:18:46", "remaining_time": "0:35:38", "throughput": 5709.41, "total_tokens": 6432128} +{"current_steps": 13075, "total_steps": 37885, "loss": 0.1621, "lr": 1.655828748240065e-06, "epoch": 1.7256169988121948, "percentage": 34.51, "elapsed_time": "0:18:46", "remaining_time": "0:35:38", "throughput": 5709.9, "total_tokens": 6434560} +{"current_steps": 13080, "total_steps": 37885, "loss": 0.0018, "lr": 1.6554808931722902e-06, "epoch": 1.7262768905899433, "percentage": 34.53, "elapsed_time": "0:18:47", "remaining_time": "0:35:37", "throughput": 5710.5, "total_tokens": 6437120} +{"current_steps": 13085, "total_steps": 37885, "loss": 0.113, "lr": 1.6551328989834423e-06, "epoch": 1.7269367823676918, "percentage": 34.54, "elapsed_time": "0:18:47", "remaining_time": "0:35:37", "throughput": 5711.04, "total_tokens": 6439616} +{"current_steps": 13090, "total_steps": 37885, "loss": 0.0015, "lr": 1.6547847657473805e-06, "epoch": 1.72759667414544, "percentage": 34.55, "elapsed_time": "0:18:47", "remaining_time": "0:35:36", "throughput": 5711.67, "total_tokens": 6442240} +{"current_steps": 13095, "total_steps": 37885, "loss": 0.0015, "lr": 1.654436493537994e-06, "epoch": 1.7282565659231885, "percentage": 34.57, "elapsed_time": "0:18:48", "remaining_time": "0:35:35", "throughput": 5712.3, "total_tokens": 6444864} +{"current_steps": 13100, "total_steps": 37885, "loss": 0.0301, "lr": 1.6540880824292008e-06, "epoch": 1.728916457700937, "percentage": 34.58, "elapsed_time": "0:18:48", "remaining_time": "0:35:35", "throughput": 5712.8, "total_tokens": 6447296} +{"current_steps": 13105, "total_steps": 37885, "loss": 0.1013, "lr": 1.6537395324949489e-06, "epoch": 1.7295763494786855, "percentage": 34.59, "elapsed_time": "0:18:48", "remaining_time": "0:35:34", "throughput": 5713.02, "total_tokens": 6449408} +{"current_steps": 13110, "total_steps": 37885, "loss": 0.0005, "lr": 1.6533908438092149e-06, "epoch": 1.730236241256434, "percentage": 34.6, "elapsed_time": "0:18:49", "remaining_time": "0:35:33", "throughput": 5713.61, "total_tokens": 6451968} +{"current_steps": 13115, "total_steps": 37885, "loss": 0.0571, "lr": 1.6530420164460055e-06, "epoch": 1.7308961330341823, "percentage": 34.62, "elapsed_time": "0:18:49", "remaining_time": "0:35:33", "throughput": 5714.01, "total_tokens": 6454272} +{"current_steps": 13120, "total_steps": 37885, "loss": 0.0762, "lr": 1.6526930504793576e-06, "epoch": 1.7315560248119308, "percentage": 34.63, "elapsed_time": "0:18:49", "remaining_time": "0:35:32", "throughput": 5714.44, "total_tokens": 6456640} +{"current_steps": 13125, "total_steps": 37885, "loss": 0.001, "lr": 1.6523439459833357e-06, "epoch": 1.7322159165896793, "percentage": 34.64, "elapsed_time": "0:18:50", "remaining_time": "0:35:32", "throughput": 5714.98, "total_tokens": 6459136} +{"current_steps": 13130, "total_steps": 37885, "loss": 0.0997, "lr": 1.6519947030320356e-06, "epoch": 1.7328758083674276, "percentage": 34.66, "elapsed_time": "0:18:50", "remaining_time": "0:35:31", "throughput": 5715.31, "total_tokens": 6461376} +{"current_steps": 13135, "total_steps": 37885, "loss": 0.0785, "lr": 1.651645321699581e-06, "epoch": 1.7335357001451763, "percentage": 34.67, "elapsed_time": "0:18:50", "remaining_time": "0:35:30", "throughput": 5715.9, "total_tokens": 6463936} +{"current_steps": 13140, "total_steps": 37885, "loss": 0.0607, "lr": 1.6512958020601256e-06, "epoch": 1.7341955919229246, "percentage": 34.68, "elapsed_time": "0:18:51", "remaining_time": "0:35:30", "throughput": 5716.46, "total_tokens": 6466432} +{"current_steps": 13145, "total_steps": 37885, "loss": 0.0482, "lr": 1.6509461441878527e-06, "epoch": 1.734855483700673, "percentage": 34.7, "elapsed_time": "0:18:51", "remaining_time": "0:35:29", "throughput": 5716.9, "total_tokens": 6468800} +{"current_steps": 13150, "total_steps": 37885, "loss": 0.0163, "lr": 1.6505963481569745e-06, "epoch": 1.7355153754784216, "percentage": 34.71, "elapsed_time": "0:18:51", "remaining_time": "0:35:28", "throughput": 5717.5, "total_tokens": 6471360} +{"current_steps": 13155, "total_steps": 37885, "loss": 0.0005, "lr": 1.6502464140417326e-06, "epoch": 1.7361752672561699, "percentage": 34.72, "elapsed_time": "0:18:52", "remaining_time": "0:35:28", "throughput": 5718.04, "total_tokens": 6473856} +{"current_steps": 13160, "total_steps": 37885, "loss": 0.2147, "lr": 1.6498963419163978e-06, "epoch": 1.7368351590339184, "percentage": 34.74, "elapsed_time": "0:18:52", "remaining_time": "0:35:27", "throughput": 5718.53, "total_tokens": 6476288} +{"current_steps": 13165, "total_steps": 37885, "loss": 0.1285, "lr": 1.6495461318552708e-06, "epoch": 1.7374950508116669, "percentage": 34.75, "elapsed_time": "0:18:52", "remaining_time": "0:35:27", "throughput": 5718.87, "total_tokens": 6478528} +{"current_steps": 13170, "total_steps": 37885, "loss": 0.0915, "lr": 1.6491957839326812e-06, "epoch": 1.7381549425894154, "percentage": 34.76, "elapsed_time": "0:18:53", "remaining_time": "0:35:26", "throughput": 5719.42, "total_tokens": 6481024} +{"current_steps": 13175, "total_steps": 37885, "loss": 0.0941, "lr": 1.6488452982229873e-06, "epoch": 1.7388148343671639, "percentage": 34.78, "elapsed_time": "0:18:53", "remaining_time": "0:35:25", "throughput": 5719.63, "total_tokens": 6483136} +{"current_steps": 13180, "total_steps": 37885, "loss": 0.127, "lr": 1.6484946748005773e-06, "epoch": 1.7394747261449122, "percentage": 34.79, "elapsed_time": "0:18:53", "remaining_time": "0:35:25", "throughput": 5720.33, "total_tokens": 6485824} +{"current_steps": 13185, "total_steps": 37885, "loss": 0.0833, "lr": 1.6481439137398688e-06, "epoch": 1.7401346179226607, "percentage": 34.8, "elapsed_time": "0:18:54", "remaining_time": "0:35:24", "throughput": 5720.9, "total_tokens": 6488384} +{"current_steps": 13190, "total_steps": 37885, "loss": 0.0005, "lr": 1.6477930151153078e-06, "epoch": 1.7407945097004092, "percentage": 34.82, "elapsed_time": "0:18:54", "remaining_time": "0:35:24", "throughput": 5721.59, "total_tokens": 6491072} +{"current_steps": 13195, "total_steps": 37885, "loss": 0.0576, "lr": 1.6474419790013707e-06, "epoch": 1.7414544014781574, "percentage": 34.83, "elapsed_time": "0:18:54", "remaining_time": "0:35:23", "throughput": 5722.13, "total_tokens": 6493568} +{"current_steps": 13200, "total_steps": 37885, "loss": 0.0509, "lr": 1.6470908054725617e-06, "epoch": 1.7421142932559062, "percentage": 34.84, "elapsed_time": "0:18:55", "remaining_time": "0:35:22", "throughput": 5722.87, "total_tokens": 6496320} +{"current_steps": 13205, "total_steps": 37885, "loss": 0.0011, "lr": 1.6467394946034152e-06, "epoch": 1.7427741850336544, "percentage": 34.86, "elapsed_time": "0:18:55", "remaining_time": "0:35:22", "throughput": 5723.19, "total_tokens": 6498560} +{"current_steps": 13210, "total_steps": 37885, "loss": 0.0763, "lr": 1.6463880464684942e-06, "epoch": 1.743434076811403, "percentage": 34.87, "elapsed_time": "0:18:55", "remaining_time": "0:35:21", "throughput": 5723.62, "total_tokens": 6500928} +{"current_steps": 13215, "total_steps": 37885, "loss": 0.069, "lr": 1.6460364611423911e-06, "epoch": 1.7440939685891514, "percentage": 34.88, "elapsed_time": "0:18:56", "remaining_time": "0:35:20", "throughput": 5724.15, "total_tokens": 6503424} +{"current_steps": 13220, "total_steps": 37885, "loss": 0.1996, "lr": 1.6456847386997277e-06, "epoch": 1.7447538603668997, "percentage": 34.9, "elapsed_time": "0:18:56", "remaining_time": "0:35:20", "throughput": 5724.57, "total_tokens": 6505792} +{"current_steps": 13225, "total_steps": 37885, "loss": 0.0445, "lr": 1.6453328792151537e-06, "epoch": 1.7454137521446482, "percentage": 34.91, "elapsed_time": "0:18:56", "remaining_time": "0:35:19", "throughput": 5725.0, "total_tokens": 6508160} +{"current_steps": 13230, "total_steps": 37885, "loss": 0.0011, "lr": 1.6449808827633497e-06, "epoch": 1.7460736439223967, "percentage": 34.92, "elapsed_time": "0:18:57", "remaining_time": "0:35:19", "throughput": 5725.8, "total_tokens": 6510976} +{"current_steps": 13235, "total_steps": 37885, "loss": 0.0272, "lr": 1.6446287494190237e-06, "epoch": 1.7467335357001452, "percentage": 34.93, "elapsed_time": "0:18:57", "remaining_time": "0:35:18", "throughput": 5726.13, "total_tokens": 6513216} +{"current_steps": 13240, "total_steps": 37885, "loss": 0.0498, "lr": 1.6442764792569136e-06, "epoch": 1.7473934274778937, "percentage": 34.95, "elapsed_time": "0:18:57", "remaining_time": "0:35:17", "throughput": 5726.82, "total_tokens": 6515904} +{"current_steps": 13245, "total_steps": 37885, "loss": 0.1935, "lr": 1.6439240723517862e-06, "epoch": 1.748053319255642, "percentage": 34.96, "elapsed_time": "0:18:58", "remaining_time": "0:35:17", "throughput": 5727.47, "total_tokens": 6518528} +{"current_steps": 13250, "total_steps": 37885, "loss": 0.0038, "lr": 1.6435715287784375e-06, "epoch": 1.7487132110333905, "percentage": 34.97, "elapsed_time": "0:18:58", "remaining_time": "0:35:16", "throughput": 5727.96, "total_tokens": 6520960} +{"current_steps": 13255, "total_steps": 37885, "loss": 0.0558, "lr": 1.643218848611692e-06, "epoch": 1.749373102811139, "percentage": 34.99, "elapsed_time": "0:18:58", "remaining_time": "0:35:16", "throughput": 5728.55, "total_tokens": 6523520} +{"current_steps": 13260, "total_steps": 37885, "loss": 0.2054, "lr": 1.642866031926404e-06, "epoch": 1.7500329945888873, "percentage": 35.0, "elapsed_time": "0:18:59", "remaining_time": "0:35:15", "throughput": 5729.14, "total_tokens": 6526080} +{"current_steps": 13265, "total_steps": 37885, "loss": 0.342, "lr": 1.6425130787974558e-06, "epoch": 1.750692886366636, "percentage": 35.01, "elapsed_time": "0:18:59", "remaining_time": "0:35:14", "throughput": 5729.57, "total_tokens": 6528448} +{"current_steps": 13265, "total_steps": 37885, "eval_loss": 0.11515690386295319, "epoch": 1.750692886366636, "percentage": 35.01, "elapsed_time": "0:19:07", "remaining_time": "0:35:29", "throughput": 5690.44, "total_tokens": 6528448} +{"current_steps": 13270, "total_steps": 37885, "loss": 0.0822, "lr": 1.6421599892997596e-06, "epoch": 1.7513527781443843, "percentage": 35.03, "elapsed_time": "0:19:44", "remaining_time": "0:36:38", "throughput": 5511.56, "total_tokens": 6531136} +{"current_steps": 13275, "total_steps": 37885, "loss": 0.2525, "lr": 1.6418067635082555e-06, "epoch": 1.7520126699221328, "percentage": 35.04, "elapsed_time": "0:19:45", "remaining_time": "0:36:37", "throughput": 5512.27, "total_tokens": 6533824} +{"current_steps": 13280, "total_steps": 37885, "loss": 0.1643, "lr": 1.6414534014979138e-06, "epoch": 1.7526725616998813, "percentage": 35.05, "elapsed_time": "0:19:45", "remaining_time": "0:36:36", "throughput": 5512.78, "total_tokens": 6536256} +{"current_steps": 13285, "total_steps": 37885, "loss": 0.0185, "lr": 1.6410999033437323e-06, "epoch": 1.7533324534776296, "percentage": 35.07, "elapsed_time": "0:19:45", "remaining_time": "0:36:36", "throughput": 5513.29, "total_tokens": 6538688} +{"current_steps": 13290, "total_steps": 37885, "loss": 0.0024, "lr": 1.640746269120739e-06, "epoch": 1.7539923452553783, "percentage": 35.08, "elapsed_time": "0:19:46", "remaining_time": "0:36:35", "throughput": 5514.0, "total_tokens": 6541376} +{"current_steps": 13295, "total_steps": 37885, "loss": 0.001, "lr": 1.6403924989039899e-06, "epoch": 1.7546522370331266, "percentage": 35.09, "elapsed_time": "0:19:46", "remaining_time": "0:36:34", "throughput": 5514.45, "total_tokens": 6543744} +{"current_steps": 13300, "total_steps": 37885, "loss": 0.0786, "lr": 1.6400385927685706e-06, "epoch": 1.755312128810875, "percentage": 35.11, "elapsed_time": "0:19:46", "remaining_time": "0:36:34", "throughput": 5514.91, "total_tokens": 6546112} +{"current_steps": 13305, "total_steps": 37885, "loss": 0.0008, "lr": 1.6396845507895942e-06, "epoch": 1.7559720205886236, "percentage": 35.12, "elapsed_time": "0:19:47", "remaining_time": "0:36:33", "throughput": 5515.43, "total_tokens": 6548544} +{"current_steps": 13310, "total_steps": 37885, "loss": 0.0397, "lr": 1.6393303730422046e-06, "epoch": 1.7566319123663718, "percentage": 35.13, "elapsed_time": "0:19:47", "remaining_time": "0:36:32", "throughput": 5515.94, "total_tokens": 6550976} +{"current_steps": 13315, "total_steps": 37885, "loss": 0.027, "lr": 1.6389760596015727e-06, "epoch": 1.7572918041441203, "percentage": 35.15, "elapsed_time": "0:19:47", "remaining_time": "0:36:32", "throughput": 5516.56, "total_tokens": 6553536} +{"current_steps": 13320, "total_steps": 37885, "loss": 0.0348, "lr": 1.6386216105428993e-06, "epoch": 1.7579516959218688, "percentage": 35.16, "elapsed_time": "0:19:48", "remaining_time": "0:36:31", "throughput": 5517.22, "total_tokens": 6556160} +{"current_steps": 13325, "total_steps": 37885, "loss": 0.0428, "lr": 1.6382670259414138e-06, "epoch": 1.7586115876996171, "percentage": 35.17, "elapsed_time": "0:19:48", "remaining_time": "0:36:30", "throughput": 5517.53, "total_tokens": 6558336} +{"current_steps": 13330, "total_steps": 37885, "loss": 0.1086, "lr": 1.637912305872374e-06, "epoch": 1.7592714794773658, "percentage": 35.19, "elapsed_time": "0:19:48", "remaining_time": "0:36:30", "throughput": 5518.17, "total_tokens": 6560960} +{"current_steps": 13335, "total_steps": 37885, "loss": 0.0529, "lr": 1.6375574504110664e-06, "epoch": 1.7599313712551141, "percentage": 35.2, "elapsed_time": "0:19:49", "remaining_time": "0:36:29", "throughput": 5518.53, "total_tokens": 6563200} +{"current_steps": 13340, "total_steps": 37885, "loss": 0.0012, "lr": 1.637202459632807e-06, "epoch": 1.7605912630328626, "percentage": 35.21, "elapsed_time": "0:19:49", "remaining_time": "0:36:28", "throughput": 5519.42, "total_tokens": 6566144} +{"current_steps": 13345, "total_steps": 37885, "loss": 0.1122, "lr": 1.6368473336129395e-06, "epoch": 1.7612511548106111, "percentage": 35.23, "elapsed_time": "0:19:49", "remaining_time": "0:36:28", "throughput": 5519.89, "total_tokens": 6568512} +{"current_steps": 13350, "total_steps": 37885, "loss": 0.1335, "lr": 1.6364920724268374e-06, "epoch": 1.7619110465883594, "percentage": 35.24, "elapsed_time": "0:19:50", "remaining_time": "0:36:27", "throughput": 5520.2, "total_tokens": 6570688} +{"current_steps": 13355, "total_steps": 37885, "loss": 0.2444, "lr": 1.6361366761499023e-06, "epoch": 1.7625709383661081, "percentage": 35.25, "elapsed_time": "0:19:50", "remaining_time": "0:36:26", "throughput": 5520.91, "total_tokens": 6573376} +{"current_steps": 13360, "total_steps": 37885, "loss": 0.0877, "lr": 1.6357811448575638e-06, "epoch": 1.7632308301438564, "percentage": 35.26, "elapsed_time": "0:19:50", "remaining_time": "0:36:26", "throughput": 5521.15, "total_tokens": 6575488} +{"current_steps": 13365, "total_steps": 37885, "loss": 0.0697, "lr": 1.6354254786252813e-06, "epoch": 1.763890721921605, "percentage": 35.28, "elapsed_time": "0:19:51", "remaining_time": "0:36:25", "throughput": 5521.62, "total_tokens": 6577856} +{"current_steps": 13370, "total_steps": 37885, "loss": 0.0228, "lr": 1.6350696775285425e-06, "epoch": 1.7645506136993534, "percentage": 35.29, "elapsed_time": "0:19:51", "remaining_time": "0:36:24", "throughput": 5521.97, "total_tokens": 6580096} +{"current_steps": 13375, "total_steps": 37885, "loss": 0.0148, "lr": 1.6347137416428637e-06, "epoch": 1.7652105054771017, "percentage": 35.3, "elapsed_time": "0:19:51", "remaining_time": "0:36:24", "throughput": 5522.52, "total_tokens": 6582592} +{"current_steps": 13380, "total_steps": 37885, "loss": 0.1398, "lr": 1.634357671043789e-06, "epoch": 1.7658703972548502, "percentage": 35.32, "elapsed_time": "0:19:52", "remaining_time": "0:36:23", "throughput": 5523.19, "total_tokens": 6585216} +{"current_steps": 13385, "total_steps": 37885, "loss": 0.0012, "lr": 1.6340014658068923e-06, "epoch": 1.7665302890325987, "percentage": 35.33, "elapsed_time": "0:19:52", "remaining_time": "0:36:22", "throughput": 5523.98, "total_tokens": 6588032} +{"current_steps": 13390, "total_steps": 37885, "loss": 0.0724, "lr": 1.6336451260077757e-06, "epoch": 1.767190180810347, "percentage": 35.34, "elapsed_time": "0:19:52", "remaining_time": "0:36:22", "throughput": 5524.54, "total_tokens": 6590528} +{"current_steps": 13395, "total_steps": 37885, "loss": 0.1463, "lr": 1.6332886517220694e-06, "epoch": 1.7678500725880957, "percentage": 35.36, "elapsed_time": "0:19:53", "remaining_time": "0:36:21", "throughput": 5525.03, "total_tokens": 6592960} +{"current_steps": 13400, "total_steps": 37885, "loss": 0.0916, "lr": 1.632932043025433e-06, "epoch": 1.768509964365844, "percentage": 35.37, "elapsed_time": "0:19:53", "remaining_time": "0:36:21", "throughput": 5525.33, "total_tokens": 6595136} +{"current_steps": 13405, "total_steps": 37885, "loss": 0.2149, "lr": 1.6325752999935539e-06, "epoch": 1.7691698561435925, "percentage": 35.38, "elapsed_time": "0:19:53", "remaining_time": "0:36:20", "throughput": 5526.03, "total_tokens": 6597824} +{"current_steps": 13410, "total_steps": 37885, "loss": 0.0036, "lr": 1.6322184227021479e-06, "epoch": 1.769829747921341, "percentage": 35.4, "elapsed_time": "0:19:54", "remaining_time": "0:36:19", "throughput": 5526.45, "total_tokens": 6600128} +{"current_steps": 13415, "total_steps": 37885, "loss": 0.0014, "lr": 1.6318614112269598e-06, "epoch": 1.7704896396990892, "percentage": 35.41, "elapsed_time": "0:19:54", "remaining_time": "0:36:19", "throughput": 5527.26, "total_tokens": 6602944} +{"current_steps": 13420, "total_steps": 37885, "loss": 0.1428, "lr": 1.631504265643763e-06, "epoch": 1.771149531476838, "percentage": 35.42, "elapsed_time": "0:19:54", "remaining_time": "0:36:18", "throughput": 5527.62, "total_tokens": 6605184} +{"current_steps": 13425, "total_steps": 37885, "loss": 0.0775, "lr": 1.6311469860283584e-06, "epoch": 1.7718094232545862, "percentage": 35.44, "elapsed_time": "0:19:55", "remaining_time": "0:36:17", "throughput": 5528.19, "total_tokens": 6607680} +{"current_steps": 13430, "total_steps": 37885, "loss": 0.0653, "lr": 1.6307895724565768e-06, "epoch": 1.7724693150323347, "percentage": 35.45, "elapsed_time": "0:19:55", "remaining_time": "0:36:17", "throughput": 5528.64, "total_tokens": 6610048} +{"current_steps": 13435, "total_steps": 37885, "loss": 0.0665, "lr": 1.6304320250042761e-06, "epoch": 1.7731292068100832, "percentage": 35.46, "elapsed_time": "0:19:55", "remaining_time": "0:36:16", "throughput": 5529.16, "total_tokens": 6612480} +{"current_steps": 13440, "total_steps": 37885, "loss": 0.0553, "lr": 1.6300743437473434e-06, "epoch": 1.7737890985878315, "percentage": 35.48, "elapsed_time": "0:19:56", "remaining_time": "0:36:15", "throughput": 5529.57, "total_tokens": 6614784} +{"current_steps": 13445, "total_steps": 37885, "loss": 0.0466, "lr": 1.6297165287616936e-06, "epoch": 1.77444899036558, "percentage": 35.49, "elapsed_time": "0:19:56", "remaining_time": "0:36:15", "throughput": 5530.03, "total_tokens": 6617152} +{"current_steps": 13450, "total_steps": 37885, "loss": 0.2752, "lr": 1.629358580123271e-06, "epoch": 1.7751088821433285, "percentage": 35.5, "elapsed_time": "0:19:56", "remaining_time": "0:36:14", "throughput": 5530.56, "total_tokens": 6619648} +{"current_steps": 13455, "total_steps": 37885, "loss": 0.0518, "lr": 1.6290004979080473e-06, "epoch": 1.7757687739210768, "percentage": 35.52, "elapsed_time": "0:19:57", "remaining_time": "0:36:13", "throughput": 5531.02, "total_tokens": 6622016} +{"current_steps": 13460, "total_steps": 37885, "loss": 0.1611, "lr": 1.6286422821920222e-06, "epoch": 1.7764286656988255, "percentage": 35.53, "elapsed_time": "0:19:57", "remaining_time": "0:36:13", "throughput": 5531.46, "total_tokens": 6624384} +{"current_steps": 13465, "total_steps": 37885, "loss": 0.1648, "lr": 1.6282839330512252e-06, "epoch": 1.7770885574765738, "percentage": 35.54, "elapsed_time": "0:19:57", "remaining_time": "0:36:12", "throughput": 5532.03, "total_tokens": 6626880} +{"current_steps": 13470, "total_steps": 37885, "loss": 0.0707, "lr": 1.6279254505617134e-06, "epoch": 1.7777484492543223, "percentage": 35.55, "elapsed_time": "0:19:58", "remaining_time": "0:36:11", "throughput": 5532.88, "total_tokens": 6629760} +{"current_steps": 13475, "total_steps": 37885, "loss": 0.0006, "lr": 1.6275668347995714e-06, "epoch": 1.7784083410320708, "percentage": 35.57, "elapsed_time": "0:19:58", "remaining_time": "0:36:11", "throughput": 5533.67, "total_tokens": 6632576} +{"current_steps": 13480, "total_steps": 37885, "loss": 0.0429, "lr": 1.6272080858409138e-06, "epoch": 1.779068232809819, "percentage": 35.58, "elapsed_time": "0:19:58", "remaining_time": "0:36:10", "throughput": 5534.28, "total_tokens": 6635136} +{"current_steps": 13485, "total_steps": 37885, "loss": 0.0012, "lr": 1.6268492037618815e-06, "epoch": 1.7797281245875678, "percentage": 35.59, "elapsed_time": "0:19:59", "remaining_time": "0:36:09", "throughput": 5534.68, "total_tokens": 6637440} +{"current_steps": 13490, "total_steps": 37885, "loss": 0.0592, "lr": 1.6264901886386448e-06, "epoch": 1.780388016365316, "percentage": 35.61, "elapsed_time": "0:19:59", "remaining_time": "0:36:09", "throughput": 5535.53, "total_tokens": 6640320} +{"current_steps": 13495, "total_steps": 37885, "loss": 0.1364, "lr": 1.6261310405474022e-06, "epoch": 1.7810479081430646, "percentage": 35.62, "elapsed_time": "0:19:59", "remaining_time": "0:36:08", "throughput": 5536.32, "total_tokens": 6643136} +{"current_steps": 13500, "total_steps": 37885, "loss": 0.0181, "lr": 1.6257717595643807e-06, "epoch": 1.781707799920813, "percentage": 35.63, "elapsed_time": "0:20:00", "remaining_time": "0:36:07", "throughput": 5536.85, "total_tokens": 6645568} +{"current_steps": 13505, "total_steps": 37885, "loss": 0.1195, "lr": 1.6254123457658346e-06, "epoch": 1.7823676916985614, "percentage": 35.65, "elapsed_time": "0:20:00", "remaining_time": "0:36:07", "throughput": 5537.47, "total_tokens": 6648128} +{"current_steps": 13510, "total_steps": 37885, "loss": 0.0976, "lr": 1.625052799228047e-06, "epoch": 1.7830275834763099, "percentage": 35.66, "elapsed_time": "0:20:00", "remaining_time": "0:36:06", "throughput": 5538.23, "total_tokens": 6650880} +{"current_steps": 13515, "total_steps": 37885, "loss": 0.0987, "lr": 1.624693120027329e-06, "epoch": 1.7836874752540584, "percentage": 35.67, "elapsed_time": "0:20:01", "remaining_time": "0:36:06", "throughput": 5538.86, "total_tokens": 6653504} +{"current_steps": 13520, "total_steps": 37885, "loss": 0.0598, "lr": 1.6243333082400197e-06, "epoch": 1.7843473670318066, "percentage": 35.69, "elapsed_time": "0:20:01", "remaining_time": "0:36:05", "throughput": 5539.37, "total_tokens": 6655936} +{"current_steps": 13525, "total_steps": 37885, "loss": 0.0493, "lr": 1.623973363942487e-06, "epoch": 1.7850072588095554, "percentage": 35.7, "elapsed_time": "0:20:01", "remaining_time": "0:36:04", "throughput": 5539.89, "total_tokens": 6658432} +{"current_steps": 13530, "total_steps": 37885, "loss": 0.0007, "lr": 1.6236132872111266e-06, "epoch": 1.7856671505873036, "percentage": 35.71, "elapsed_time": "0:20:02", "remaining_time": "0:36:04", "throughput": 5540.3, "total_tokens": 6660800} +{"current_steps": 13535, "total_steps": 37885, "loss": 0.0938, "lr": 1.6232530781223613e-06, "epoch": 1.7863270423650521, "percentage": 35.73, "elapsed_time": "0:20:02", "remaining_time": "0:36:03", "throughput": 5540.77, "total_tokens": 6663232} +{"current_steps": 13540, "total_steps": 37885, "loss": 0.1012, "lr": 1.6228927367526437e-06, "epoch": 1.7869869341428006, "percentage": 35.74, "elapsed_time": "0:20:02", "remaining_time": "0:36:02", "throughput": 5541.34, "total_tokens": 6665792} +{"current_steps": 13545, "total_steps": 37885, "loss": 0.0525, "lr": 1.6225322631784533e-06, "epoch": 1.787646825920549, "percentage": 35.75, "elapsed_time": "0:20:03", "remaining_time": "0:36:02", "throughput": 5541.88, "total_tokens": 6668352} +{"current_steps": 13550, "total_steps": 37885, "loss": 0.0015, "lr": 1.622171657476298e-06, "epoch": 1.7883067176982976, "percentage": 35.77, "elapsed_time": "0:20:03", "remaining_time": "0:36:01", "throughput": 5542.48, "total_tokens": 6670976} +{"current_steps": 13555, "total_steps": 37885, "loss": 0.0724, "lr": 1.621810919722714e-06, "epoch": 1.788966609476046, "percentage": 35.78, "elapsed_time": "0:20:03", "remaining_time": "0:36:01", "throughput": 5542.91, "total_tokens": 6673472} +{"current_steps": 13560, "total_steps": 37885, "loss": 0.0431, "lr": 1.6214500499942649e-06, "epoch": 1.7896265012537944, "percentage": 35.79, "elapsed_time": "0:20:04", "remaining_time": "0:36:00", "throughput": 5543.35, "total_tokens": 6675904} +{"current_steps": 13565, "total_steps": 37885, "loss": 0.1878, "lr": 1.6210890483675427e-06, "epoch": 1.790286393031543, "percentage": 35.81, "elapsed_time": "0:20:04", "remaining_time": "0:35:59", "throughput": 5543.68, "total_tokens": 6678208} +{"current_steps": 13570, "total_steps": 37885, "loss": 0.0459, "lr": 1.620727914919168e-06, "epoch": 1.7909462848092912, "percentage": 35.82, "elapsed_time": "0:20:04", "remaining_time": "0:35:59", "throughput": 5544.19, "total_tokens": 6680704} +{"current_steps": 13575, "total_steps": 37885, "loss": 0.0964, "lr": 1.620366649725788e-06, "epoch": 1.7916061765870397, "percentage": 35.83, "elapsed_time": "0:20:05", "remaining_time": "0:35:58", "throughput": 5544.64, "total_tokens": 6683136} +{"current_steps": 13580, "total_steps": 37885, "loss": 0.0529, "lr": 1.6200052528640792e-06, "epoch": 1.7922660683647882, "percentage": 35.85, "elapsed_time": "0:20:05", "remaining_time": "0:35:57", "throughput": 5545.14, "total_tokens": 6685632} +{"current_steps": 13585, "total_steps": 37885, "loss": 0.1235, "lr": 1.619643724410745e-06, "epoch": 1.7929259601425365, "percentage": 35.86, "elapsed_time": "0:20:06", "remaining_time": "0:35:57", "throughput": 5545.44, "total_tokens": 6687872} +{"current_steps": 13590, "total_steps": 37885, "loss": 0.002, "lr": 1.6192820644425176e-06, "epoch": 1.7935858519202852, "percentage": 35.87, "elapsed_time": "0:20:06", "remaining_time": "0:35:56", "throughput": 5545.93, "total_tokens": 6690368} +{"current_steps": 13595, "total_steps": 37885, "loss": 0.0692, "lr": 1.6189202730361563e-06, "epoch": 1.7942457436980335, "percentage": 35.88, "elapsed_time": "0:20:06", "remaining_time": "0:35:55", "throughput": 5546.56, "total_tokens": 6692992} +{"current_steps": 13600, "total_steps": 37885, "loss": 0.0568, "lr": 1.618558350268449e-06, "epoch": 1.794905635475782, "percentage": 35.9, "elapsed_time": "0:20:07", "remaining_time": "0:35:55", "throughput": 5546.81, "total_tokens": 6695168} +{"current_steps": 13605, "total_steps": 37885, "loss": 0.1914, "lr": 1.618196296216211e-06, "epoch": 1.7955655272535305, "percentage": 35.91, "elapsed_time": "0:20:07", "remaining_time": "0:35:54", "throughput": 5547.23, "total_tokens": 6697536} +{"current_steps": 13610, "total_steps": 37885, "loss": 0.073, "lr": 1.6178341109562859e-06, "epoch": 1.7962254190312787, "percentage": 35.92, "elapsed_time": "0:20:07", "remaining_time": "0:35:54", "throughput": 5547.96, "total_tokens": 6700288} +{"current_steps": 13615, "total_steps": 37885, "loss": 0.0581, "lr": 1.6174717945655446e-06, "epoch": 1.7968853108090275, "percentage": 35.94, "elapsed_time": "0:20:08", "remaining_time": "0:35:53", "throughput": 5548.57, "total_tokens": 6702912} +{"current_steps": 13620, "total_steps": 37885, "loss": 0.083, "lr": 1.6171093471208863e-06, "epoch": 1.7975452025867757, "percentage": 35.95, "elapsed_time": "0:20:08", "remaining_time": "0:35:52", "throughput": 5549.11, "total_tokens": 6705408} +{"current_steps": 13625, "total_steps": 37885, "loss": 0.0008, "lr": 1.616746768699238e-06, "epoch": 1.7982050943645242, "percentage": 35.96, "elapsed_time": "0:20:08", "remaining_time": "0:35:52", "throughput": 5549.63, "total_tokens": 6707904} +{"current_steps": 13630, "total_steps": 37885, "loss": 0.0738, "lr": 1.616384059377554e-06, "epoch": 1.7988649861422727, "percentage": 35.98, "elapsed_time": "0:20:09", "remaining_time": "0:35:51", "throughput": 5550.14, "total_tokens": 6710400} +{"current_steps": 13635, "total_steps": 37885, "loss": 0.0009, "lr": 1.616021219232817e-06, "epoch": 1.799524877920021, "percentage": 35.99, "elapsed_time": "0:20:09", "remaining_time": "0:35:50", "throughput": 5550.68, "total_tokens": 6712896} +{"current_steps": 13640, "total_steps": 37885, "loss": 0.0371, "lr": 1.6156582483420374e-06, "epoch": 1.8001847696977695, "percentage": 36.0, "elapsed_time": "0:20:09", "remaining_time": "0:35:50", "throughput": 5551.28, "total_tokens": 6715520} +{"current_steps": 13645, "total_steps": 37885, "loss": 0.1065, "lr": 1.6152951467822523e-06, "epoch": 1.800844661475518, "percentage": 36.02, "elapsed_time": "0:20:10", "remaining_time": "0:35:49", "throughput": 5551.85, "total_tokens": 6718080} +{"current_steps": 13650, "total_steps": 37885, "loss": 0.1254, "lr": 1.614931914630528e-06, "epoch": 1.8015045532532663, "percentage": 36.03, "elapsed_time": "0:20:10", "remaining_time": "0:35:49", "throughput": 5552.16, "total_tokens": 6720320} +{"current_steps": 13655, "total_steps": 37885, "loss": 0.0896, "lr": 1.6145685519639577e-06, "epoch": 1.802164445031015, "percentage": 36.04, "elapsed_time": "0:20:10", "remaining_time": "0:35:48", "throughput": 5552.68, "total_tokens": 6722816} +{"current_steps": 13660, "total_steps": 37885, "loss": 0.0551, "lr": 1.6142050588596631e-06, "epoch": 1.8028243368087633, "percentage": 36.06, "elapsed_time": "0:20:11", "remaining_time": "0:35:47", "throughput": 5553.05, "total_tokens": 6725120} +{"current_steps": 13665, "total_steps": 37885, "loss": 0.0597, "lr": 1.6138414353947923e-06, "epoch": 1.8034842285865118, "percentage": 36.07, "elapsed_time": "0:20:11", "remaining_time": "0:35:47", "throughput": 5553.55, "total_tokens": 6727616} +{"current_steps": 13670, "total_steps": 37885, "loss": 0.0805, "lr": 1.613477681646522e-06, "epoch": 1.8041441203642603, "percentage": 36.08, "elapsed_time": "0:20:11", "remaining_time": "0:35:46", "throughput": 5554.2, "total_tokens": 6730240} +{"current_steps": 13675, "total_steps": 37885, "loss": 0.0583, "lr": 1.6131137976920556e-06, "epoch": 1.8048040121420086, "percentage": 36.1, "elapsed_time": "0:20:12", "remaining_time": "0:35:45", "throughput": 5554.64, "total_tokens": 6732608} +{"current_steps": 13680, "total_steps": 37885, "loss": 0.1257, "lr": 1.612749783608626e-06, "epoch": 1.8054639039197573, "percentage": 36.11, "elapsed_time": "0:20:12", "remaining_time": "0:35:45", "throughput": 5555.22, "total_tokens": 6735168} +{"current_steps": 13685, "total_steps": 37885, "loss": 0.1853, "lr": 1.612385639473492e-06, "epoch": 1.8061237956975056, "percentage": 36.12, "elapsed_time": "0:20:12", "remaining_time": "0:35:44", "throughput": 5555.76, "total_tokens": 6737664} +{"current_steps": 13690, "total_steps": 37885, "loss": 0.0448, "lr": 1.6120213653639407e-06, "epoch": 1.806783687475254, "percentage": 36.14, "elapsed_time": "0:20:13", "remaining_time": "0:35:43", "throughput": 5556.33, "total_tokens": 6740224} +{"current_steps": 13695, "total_steps": 37885, "loss": 0.0295, "lr": 1.6116569613572861e-06, "epoch": 1.8074435792530026, "percentage": 36.15, "elapsed_time": "0:20:13", "remaining_time": "0:35:43", "throughput": 5556.96, "total_tokens": 6742848} +{"current_steps": 13700, "total_steps": 37885, "loss": 0.1216, "lr": 1.611292427530871e-06, "epoch": 1.8081034710307509, "percentage": 36.16, "elapsed_time": "0:20:13", "remaining_time": "0:35:42", "throughput": 5557.59, "total_tokens": 6745472} +{"current_steps": 13705, "total_steps": 37885, "loss": 0.0853, "lr": 1.6109277639620648e-06, "epoch": 1.8087633628084994, "percentage": 36.18, "elapsed_time": "0:20:14", "remaining_time": "0:35:42", "throughput": 5558.07, "total_tokens": 6747904} +{"current_steps": 13710, "total_steps": 37885, "loss": 0.1297, "lr": 1.6105629707282645e-06, "epoch": 1.8094232545862479, "percentage": 36.19, "elapsed_time": "0:20:14", "remaining_time": "0:35:41", "throughput": 5558.77, "total_tokens": 6750592} +{"current_steps": 13715, "total_steps": 37885, "loss": 0.0997, "lr": 1.6101980479068954e-06, "epoch": 1.8100831463639961, "percentage": 36.2, "elapsed_time": "0:20:14", "remaining_time": "0:35:40", "throughput": 5559.24, "total_tokens": 6753024} +{"current_steps": 13720, "total_steps": 37885, "loss": 0.0846, "lr": 1.609832995575409e-06, "epoch": 1.8107430381417449, "percentage": 36.21, "elapsed_time": "0:20:15", "remaining_time": "0:35:40", "throughput": 5559.67, "total_tokens": 6755392} +{"current_steps": 13725, "total_steps": 37885, "loss": 0.0217, "lr": 1.6094678138112854e-06, "epoch": 1.8114029299194931, "percentage": 36.23, "elapsed_time": "0:20:15", "remaining_time": "0:35:39", "throughput": 5560.05, "total_tokens": 6757696} +{"current_steps": 13730, "total_steps": 37885, "loss": 0.0975, "lr": 1.6091025026920316e-06, "epoch": 1.8120628216972416, "percentage": 36.24, "elapsed_time": "0:20:15", "remaining_time": "0:35:38", "throughput": 5560.52, "total_tokens": 6760128} +{"current_steps": 13735, "total_steps": 37885, "loss": 0.122, "lr": 1.6087370622951824e-06, "epoch": 1.8127227134749901, "percentage": 36.25, "elapsed_time": "0:20:16", "remaining_time": "0:35:38", "throughput": 5560.95, "total_tokens": 6762496} +{"current_steps": 13740, "total_steps": 37885, "loss": 0.0623, "lr": 1.6083714926983004e-06, "epoch": 1.8133826052527384, "percentage": 36.27, "elapsed_time": "0:20:16", "remaining_time": "0:35:37", "throughput": 5561.56, "total_tokens": 6765120} +{"current_steps": 13745, "total_steps": 37885, "loss": 0.0799, "lr": 1.608005793978974e-06, "epoch": 1.8140424970304871, "percentage": 36.28, "elapsed_time": "0:20:16", "remaining_time": "0:35:36", "throughput": 5562.23, "total_tokens": 6767808} +{"current_steps": 13750, "total_steps": 37885, "loss": 0.1322, "lr": 1.6076399662148207e-06, "epoch": 1.8147023888082354, "percentage": 36.29, "elapsed_time": "0:20:17", "remaining_time": "0:35:36", "throughput": 5562.7, "total_tokens": 6770240} +{"current_steps": 13755, "total_steps": 37885, "loss": 0.0825, "lr": 1.6072740094834848e-06, "epoch": 1.815362280585984, "percentage": 36.31, "elapsed_time": "0:20:17", "remaining_time": "0:35:35", "throughput": 5563.1, "total_tokens": 6772608} +{"current_steps": 13760, "total_steps": 37885, "loss": 0.1762, "lr": 1.606907923862638e-06, "epoch": 1.8160221723637324, "percentage": 36.32, "elapsed_time": "0:20:17", "remaining_time": "0:35:35", "throughput": 5563.51, "total_tokens": 6774976} +{"current_steps": 13765, "total_steps": 37885, "loss": 0.0425, "lr": 1.6065417094299793e-06, "epoch": 1.8166820641414807, "percentage": 36.33, "elapsed_time": "0:20:18", "remaining_time": "0:35:34", "throughput": 5563.99, "total_tokens": 6777408} +{"current_steps": 13770, "total_steps": 37885, "loss": 0.0008, "lr": 1.6061753662632352e-06, "epoch": 1.8173419559192292, "percentage": 36.35, "elapsed_time": "0:20:18", "remaining_time": "0:35:33", "throughput": 5564.25, "total_tokens": 6779584} +{"current_steps": 13775, "total_steps": 37885, "loss": 0.0566, "lr": 1.6058088944401586e-06, "epoch": 1.8180018476969777, "percentage": 36.36, "elapsed_time": "0:20:18", "remaining_time": "0:35:33", "throughput": 5564.7, "total_tokens": 6782016} +{"current_steps": 13780, "total_steps": 37885, "loss": 0.07, "lr": 1.6054422940385315e-06, "epoch": 1.818661739474726, "percentage": 36.37, "elapsed_time": "0:20:19", "remaining_time": "0:35:32", "throughput": 5565.27, "total_tokens": 6784576} +{"current_steps": 13785, "total_steps": 37885, "loss": 0.0423, "lr": 1.6050755651361617e-06, "epoch": 1.8193216312524747, "percentage": 36.39, "elapsed_time": "0:20:19", "remaining_time": "0:35:31", "throughput": 5565.77, "total_tokens": 6787072} +{"current_steps": 13790, "total_steps": 37885, "loss": 0.1433, "lr": 1.6047087078108848e-06, "epoch": 1.819981523030223, "percentage": 36.4, "elapsed_time": "0:20:19", "remaining_time": "0:35:31", "throughput": 5566.15, "total_tokens": 6789376} +{"current_steps": 13795, "total_steps": 37885, "loss": 0.0027, "lr": 1.6043417221405636e-06, "epoch": 1.8206414148079715, "percentage": 36.41, "elapsed_time": "0:20:20", "remaining_time": "0:35:30", "throughput": 5566.77, "total_tokens": 6792000} +{"current_steps": 13800, "total_steps": 37885, "loss": 0.0624, "lr": 1.6039746082030878e-06, "epoch": 1.82130130658572, "percentage": 36.43, "elapsed_time": "0:20:20", "remaining_time": "0:35:30", "throughput": 5567.18, "total_tokens": 6794368} +{"current_steps": 13805, "total_steps": 37885, "loss": 0.0008, "lr": 1.6036073660763755e-06, "epoch": 1.8219611983634683, "percentage": 36.44, "elapsed_time": "0:20:20", "remaining_time": "0:35:29", "throughput": 5567.66, "total_tokens": 6796800} +{"current_steps": 13810, "total_steps": 37885, "loss": 0.0646, "lr": 1.6032399958383706e-06, "epoch": 1.822621090141217, "percentage": 36.45, "elapsed_time": "0:20:21", "remaining_time": "0:35:28", "throughput": 5568.14, "total_tokens": 6799232} +{"current_steps": 13815, "total_steps": 37885, "loss": 0.1131, "lr": 1.6028724975670454e-06, "epoch": 1.8232809819189653, "percentage": 36.47, "elapsed_time": "0:20:21", "remaining_time": "0:35:28", "throughput": 5568.71, "total_tokens": 6801792} +{"current_steps": 13820, "total_steps": 37885, "loss": 0.1451, "lr": 1.6025048713403977e-06, "epoch": 1.8239408736967138, "percentage": 36.48, "elapsed_time": "0:20:21", "remaining_time": "0:35:27", "throughput": 5569.32, "total_tokens": 6804416} +{"current_steps": 13825, "total_steps": 37885, "loss": 0.0421, "lr": 1.6021371172364543e-06, "epoch": 1.8246007654744623, "percentage": 36.49, "elapsed_time": "0:20:22", "remaining_time": "0:35:26", "throughput": 5569.81, "total_tokens": 6806912} +{"current_steps": 13830, "total_steps": 37885, "loss": 0.0012, "lr": 1.6017692353332676e-06, "epoch": 1.8252606572522105, "percentage": 36.51, "elapsed_time": "0:20:22", "remaining_time": "0:35:26", "throughput": 5570.25, "total_tokens": 6809280} +{"current_steps": 13835, "total_steps": 37885, "loss": 0.0015, "lr": 1.6014012257089186e-06, "epoch": 1.825920549029959, "percentage": 36.52, "elapsed_time": "0:20:22", "remaining_time": "0:35:25", "throughput": 5570.8, "total_tokens": 6811776} +{"current_steps": 13840, "total_steps": 37885, "loss": 0.0004, "lr": 1.6010330884415146e-06, "epoch": 1.8265804408077075, "percentage": 36.53, "elapsed_time": "0:20:23", "remaining_time": "0:35:24", "throughput": 5571.17, "total_tokens": 6814080} +{"current_steps": 13845, "total_steps": 37885, "loss": 0.088, "lr": 1.6006648236091903e-06, "epoch": 1.827240332585456, "percentage": 36.54, "elapsed_time": "0:20:23", "remaining_time": "0:35:24", "throughput": 5571.63, "total_tokens": 6816512} +{"current_steps": 13850, "total_steps": 37885, "loss": 0.0002, "lr": 1.600296431290106e-06, "epoch": 1.8279002243632045, "percentage": 36.56, "elapsed_time": "0:20:23", "remaining_time": "0:35:23", "throughput": 5571.94, "total_tokens": 6818752} +{"current_steps": 13855, "total_steps": 37885, "loss": 0.1294, "lr": 1.5999279115624517e-06, "epoch": 1.8285601161409528, "percentage": 36.57, "elapsed_time": "0:20:24", "remaining_time": "0:35:23", "throughput": 5572.4, "total_tokens": 6821248} +{"current_steps": 13860, "total_steps": 37885, "loss": 0.1328, "lr": 1.5995592645044424e-06, "epoch": 1.8292200079187013, "percentage": 36.58, "elapsed_time": "0:20:24", "remaining_time": "0:35:22", "throughput": 5572.96, "total_tokens": 6823808} +{"current_steps": 13865, "total_steps": 37885, "loss": 0.1448, "lr": 1.599190490194321e-06, "epoch": 1.8298798996964498, "percentage": 36.6, "elapsed_time": "0:20:24", "remaining_time": "0:35:21", "throughput": 5573.57, "total_tokens": 6826432} +{"current_steps": 13870, "total_steps": 37885, "loss": 0.0097, "lr": 1.5988215887103568e-06, "epoch": 1.830539791474198, "percentage": 36.61, "elapsed_time": "0:20:25", "remaining_time": "0:35:21", "throughput": 5574.0, "total_tokens": 6828800} +{"current_steps": 13875, "total_steps": 37885, "loss": 0.1828, "lr": 1.598452560130847e-06, "epoch": 1.8311996832519468, "percentage": 36.62, "elapsed_time": "0:20:25", "remaining_time": "0:35:20", "throughput": 5574.58, "total_tokens": 6831360} +{"current_steps": 13880, "total_steps": 37885, "loss": 0.1433, "lr": 1.598083404534115e-06, "epoch": 1.831859575029695, "percentage": 36.64, "elapsed_time": "0:20:25", "remaining_time": "0:35:19", "throughput": 5574.97, "total_tokens": 6833664} +{"current_steps": 13885, "total_steps": 37885, "loss": 0.065, "lr": 1.597714121998511e-06, "epoch": 1.8325194668074436, "percentage": 36.65, "elapsed_time": "0:20:26", "remaining_time": "0:35:19", "throughput": 5575.31, "total_tokens": 6835904} +{"current_steps": 13890, "total_steps": 37885, "loss": 0.1494, "lr": 1.5973447126024131e-06, "epoch": 1.833179358585192, "percentage": 36.66, "elapsed_time": "0:20:26", "remaining_time": "0:35:18", "throughput": 5575.8, "total_tokens": 6838336} +{"current_steps": 13895, "total_steps": 37885, "loss": 0.2852, "lr": 1.596975176424226e-06, "epoch": 1.8338392503629404, "percentage": 36.68, "elapsed_time": "0:20:26", "remaining_time": "0:35:18", "throughput": 5576.47, "total_tokens": 6841024} +{"current_steps": 13900, "total_steps": 37885, "loss": 0.1151, "lr": 1.5966055135423798e-06, "epoch": 1.8344991421406889, "percentage": 36.69, "elapsed_time": "0:20:27", "remaining_time": "0:35:17", "throughput": 5576.76, "total_tokens": 6843200} +{"current_steps": 13905, "total_steps": 37885, "loss": 0.004, "lr": 1.5962357240353342e-06, "epoch": 1.8351590339184374, "percentage": 36.7, "elapsed_time": "0:20:27", "remaining_time": "0:35:16", "throughput": 5577.21, "total_tokens": 6845568} +{"current_steps": 13910, "total_steps": 37885, "loss": 0.0585, "lr": 1.5958658079815737e-06, "epoch": 1.8358189256961859, "percentage": 36.72, "elapsed_time": "0:20:27", "remaining_time": "0:35:16", "throughput": 5577.7, "total_tokens": 6848000} +{"current_steps": 13915, "total_steps": 37885, "loss": 0.0423, "lr": 1.5954957654596102e-06, "epoch": 1.8364788174739344, "percentage": 36.73, "elapsed_time": "0:20:28", "remaining_time": "0:35:15", "throughput": 5578.39, "total_tokens": 6850688} +{"current_steps": 13920, "total_steps": 37885, "loss": 0.0121, "lr": 1.595125596547983e-06, "epoch": 1.8371387092516827, "percentage": 36.74, "elapsed_time": "0:20:28", "remaining_time": "0:35:14", "throughput": 5578.74, "total_tokens": 6852928} +{"current_steps": 13925, "total_steps": 37885, "loss": 0.0006, "lr": 1.5947553013252572e-06, "epoch": 1.8377986010294312, "percentage": 36.76, "elapsed_time": "0:20:28", "remaining_time": "0:35:14", "throughput": 5579.28, "total_tokens": 6855424} +{"current_steps": 13930, "total_steps": 37885, "loss": 0.0626, "lr": 1.594384879870026e-06, "epoch": 1.8384584928071797, "percentage": 36.77, "elapsed_time": "0:20:29", "remaining_time": "0:35:13", "throughput": 5579.92, "total_tokens": 6858048} +{"current_steps": 13935, "total_steps": 37885, "loss": 0.0881, "lr": 1.594014332260908e-06, "epoch": 1.839118384584928, "percentage": 36.78, "elapsed_time": "0:20:29", "remaining_time": "0:35:12", "throughput": 5580.55, "total_tokens": 6860672} +{"current_steps": 13940, "total_steps": 37885, "loss": 0.0854, "lr": 1.5936436585765493e-06, "epoch": 1.8397782763626767, "percentage": 36.8, "elapsed_time": "0:20:29", "remaining_time": "0:35:12", "throughput": 5581.04, "total_tokens": 6863104} +{"current_steps": 13945, "total_steps": 37885, "loss": 0.1164, "lr": 1.5932728588956233e-06, "epoch": 1.840438168140425, "percentage": 36.81, "elapsed_time": "0:20:30", "remaining_time": "0:35:11", "throughput": 5581.49, "total_tokens": 6865472} +{"current_steps": 13950, "total_steps": 37885, "loss": 0.0017, "lr": 1.5929019332968285e-06, "epoch": 1.8410980599181734, "percentage": 36.82, "elapsed_time": "0:20:30", "remaining_time": "0:35:11", "throughput": 5581.92, "total_tokens": 6867840} +{"current_steps": 13955, "total_steps": 37885, "loss": 0.0018, "lr": 1.5925308818588926e-06, "epoch": 1.841757951695922, "percentage": 36.84, "elapsed_time": "0:20:30", "remaining_time": "0:35:10", "throughput": 5582.35, "total_tokens": 6870208} +{"current_steps": 13960, "total_steps": 37885, "loss": 0.055, "lr": 1.5921597046605672e-06, "epoch": 1.8424178434736702, "percentage": 36.85, "elapsed_time": "0:20:31", "remaining_time": "0:35:09", "throughput": 5582.92, "total_tokens": 6872768} +{"current_steps": 13965, "total_steps": 37885, "loss": 0.046, "lr": 1.5917884017806327e-06, "epoch": 1.8430777352514187, "percentage": 36.86, "elapsed_time": "0:20:31", "remaining_time": "0:35:09", "throughput": 5583.49, "total_tokens": 6875328} +{"current_steps": 13970, "total_steps": 37885, "loss": 0.1396, "lr": 1.5914169732978957e-06, "epoch": 1.8437376270291672, "percentage": 36.87, "elapsed_time": "0:20:31", "remaining_time": "0:35:08", "throughput": 5583.84, "total_tokens": 6877632} +{"current_steps": 13975, "total_steps": 37885, "loss": 0.1319, "lr": 1.5910454192911883e-06, "epoch": 1.8443975188069157, "percentage": 36.89, "elapsed_time": "0:20:32", "remaining_time": "0:35:07", "throughput": 5584.17, "total_tokens": 6879872} +{"current_steps": 13980, "total_steps": 37885, "loss": 0.092, "lr": 1.590673739839371e-06, "epoch": 1.8450574105846642, "percentage": 36.9, "elapsed_time": "0:20:32", "remaining_time": "0:35:07", "throughput": 5584.9, "total_tokens": 6882688} +{"current_steps": 13985, "total_steps": 37885, "loss": 0.0631, "lr": 1.5903019350213293e-06, "epoch": 1.8457173023624125, "percentage": 36.91, "elapsed_time": "0:20:32", "remaining_time": "0:35:06", "throughput": 5585.44, "total_tokens": 6885248} +{"current_steps": 13990, "total_steps": 37885, "loss": 0.0017, "lr": 1.589930004915977e-06, "epoch": 1.846377194140161, "percentage": 36.93, "elapsed_time": "0:20:33", "remaining_time": "0:35:06", "throughput": 5585.82, "total_tokens": 6887552} +{"current_steps": 13995, "total_steps": 37885, "loss": 0.0009, "lr": 1.5895579496022532e-06, "epoch": 1.8470370859179095, "percentage": 36.94, "elapsed_time": "0:20:33", "remaining_time": "0:35:05", "throughput": 5586.31, "total_tokens": 6889984} +{"current_steps": 14000, "total_steps": 37885, "loss": 0.3151, "lr": 1.5891857691591235e-06, "epoch": 1.8476969776956578, "percentage": 36.95, "elapsed_time": "0:20:33", "remaining_time": "0:35:04", "throughput": 5586.84, "total_tokens": 6892480} +{"current_steps": 14005, "total_steps": 37885, "loss": 0.0002, "lr": 1.588813463665581e-06, "epoch": 1.8483568694734065, "percentage": 36.97, "elapsed_time": "0:20:34", "remaining_time": "0:35:04", "throughput": 5587.33, "total_tokens": 6894912} +{"current_steps": 14010, "total_steps": 37885, "loss": 0.1177, "lr": 1.5884410332006443e-06, "epoch": 1.8490167612511548, "percentage": 36.98, "elapsed_time": "0:20:34", "remaining_time": "0:35:03", "throughput": 5587.86, "total_tokens": 6897408} +{"current_steps": 14015, "total_steps": 37885, "loss": 0.0403, "lr": 1.58806847784336e-06, "epoch": 1.8496766530289033, "percentage": 36.99, "elapsed_time": "0:20:34", "remaining_time": "0:35:02", "throughput": 5588.25, "total_tokens": 6899712} +{"current_steps": 14020, "total_steps": 37885, "loss": 0.1096, "lr": 1.5876957976727993e-06, "epoch": 1.8503365448066518, "percentage": 37.01, "elapsed_time": "0:20:35", "remaining_time": "0:35:02", "throughput": 5588.64, "total_tokens": 6902016} +{"current_steps": 14025, "total_steps": 37885, "loss": 0.1389, "lr": 1.5873229927680617e-06, "epoch": 1.8509964365844, "percentage": 37.02, "elapsed_time": "0:20:35", "remaining_time": "0:35:01", "throughput": 5589.07, "total_tokens": 6904384} +{"current_steps": 14030, "total_steps": 37885, "loss": 0.0433, "lr": 1.5869500632082717e-06, "epoch": 1.8516563283621488, "percentage": 37.03, "elapsed_time": "0:20:35", "remaining_time": "0:35:00", "throughput": 5589.56, "total_tokens": 6906816} +{"current_steps": 14035, "total_steps": 37885, "loss": 0.0012, "lr": 1.586577009072581e-06, "epoch": 1.852316220139897, "percentage": 37.05, "elapsed_time": "0:20:35", "remaining_time": "0:35:00", "throughput": 5590.13, "total_tokens": 6909376} +{"current_steps": 14040, "total_steps": 37885, "loss": 0.0512, "lr": 1.5862038304401682e-06, "epoch": 1.8529761119176456, "percentage": 37.06, "elapsed_time": "0:20:36", "remaining_time": "0:34:59", "throughput": 5590.52, "total_tokens": 6911680} +{"current_steps": 14045, "total_steps": 37885, "loss": 0.0724, "lr": 1.585830527390237e-06, "epoch": 1.853636003695394, "percentage": 37.07, "elapsed_time": "0:20:36", "remaining_time": "0:34:59", "throughput": 5590.86, "total_tokens": 6913920} +{"current_steps": 14050, "total_steps": 37885, "loss": 0.189, "lr": 1.585457100002019e-06, "epoch": 1.8542958954731423, "percentage": 37.09, "elapsed_time": "0:20:36", "remaining_time": "0:34:58", "throughput": 5591.38, "total_tokens": 6916416} +{"current_steps": 14055, "total_steps": 37885, "loss": 0.0629, "lr": 1.5850835483547705e-06, "epoch": 1.8549557872508908, "percentage": 37.1, "elapsed_time": "0:20:37", "remaining_time": "0:34:57", "throughput": 5591.86, "total_tokens": 6918848} +{"current_steps": 14060, "total_steps": 37885, "loss": 0.0666, "lr": 1.5847098725277763e-06, "epoch": 1.8556156790286393, "percentage": 37.11, "elapsed_time": "0:20:37", "remaining_time": "0:34:57", "throughput": 5592.52, "total_tokens": 6921536} +{"current_steps": 14065, "total_steps": 37885, "loss": 0.0795, "lr": 1.5843360726003454e-06, "epoch": 1.8562755708063876, "percentage": 37.13, "elapsed_time": "0:20:37", "remaining_time": "0:34:56", "throughput": 5593.09, "total_tokens": 6924096} +{"current_steps": 14070, "total_steps": 37885, "loss": 0.0008, "lr": 1.5839621486518144e-06, "epoch": 1.8569354625841363, "percentage": 37.14, "elapsed_time": "0:20:38", "remaining_time": "0:34:55", "throughput": 5593.47, "total_tokens": 6926400} +{"current_steps": 14075, "total_steps": 37885, "loss": 0.2018, "lr": 1.5835881007615466e-06, "epoch": 1.8575953543618846, "percentage": 37.15, "elapsed_time": "0:20:38", "remaining_time": "0:34:55", "throughput": 5594.0, "total_tokens": 6928896} +{"current_steps": 14080, "total_steps": 37885, "loss": 0.184, "lr": 1.5832139290089302e-06, "epoch": 1.858255246139633, "percentage": 37.17, "elapsed_time": "0:20:38", "remaining_time": "0:34:54", "throughput": 5594.53, "total_tokens": 6931392} +{"current_steps": 14085, "total_steps": 37885, "loss": 0.0733, "lr": 1.5828396334733807e-06, "epoch": 1.8589151379173816, "percentage": 37.18, "elapsed_time": "0:20:39", "remaining_time": "0:34:54", "throughput": 5595.2, "total_tokens": 6934080} +{"current_steps": 14090, "total_steps": 37885, "loss": 0.1321, "lr": 1.5824652142343394e-06, "epoch": 1.8595750296951299, "percentage": 37.19, "elapsed_time": "0:20:39", "remaining_time": "0:34:53", "throughput": 5595.74, "total_tokens": 6936576} +{"current_steps": 14095, "total_steps": 37885, "loss": 0.1625, "lr": 1.582090671371274e-06, "epoch": 1.8602349214728786, "percentage": 37.2, "elapsed_time": "0:20:39", "remaining_time": "0:34:52", "throughput": 5596.11, "total_tokens": 6938880} +{"current_steps": 14100, "total_steps": 37885, "loss": 0.1393, "lr": 1.5817160049636792e-06, "epoch": 1.860894813250627, "percentage": 37.22, "elapsed_time": "0:20:40", "remaining_time": "0:34:52", "throughput": 5596.74, "total_tokens": 6941504} +{"current_steps": 14105, "total_steps": 37885, "loss": 0.0703, "lr": 1.5813412150910748e-06, "epoch": 1.8615547050283754, "percentage": 37.23, "elapsed_time": "0:20:40", "remaining_time": "0:34:51", "throughput": 5597.27, "total_tokens": 6944000} +{"current_steps": 14110, "total_steps": 37885, "loss": 0.1415, "lr": 1.580966301833007e-06, "epoch": 1.862214596806124, "percentage": 37.24, "elapsed_time": "0:20:40", "remaining_time": "0:34:50", "throughput": 5597.8, "total_tokens": 6946496} +{"current_steps": 14115, "total_steps": 37885, "loss": 0.0015, "lr": 1.580591265269049e-06, "epoch": 1.8628744885838722, "percentage": 37.26, "elapsed_time": "0:20:41", "remaining_time": "0:34:50", "throughput": 5598.22, "total_tokens": 6948864} +{"current_steps": 14120, "total_steps": 37885, "loss": 0.0012, "lr": 1.580216105478799e-06, "epoch": 1.8635343803616207, "percentage": 37.27, "elapsed_time": "0:20:41", "remaining_time": "0:34:49", "throughput": 5598.75, "total_tokens": 6951360} +{"current_steps": 14125, "total_steps": 37885, "loss": 0.0014, "lr": 1.5798408225418825e-06, "epoch": 1.8641942721393692, "percentage": 37.28, "elapsed_time": "0:20:41", "remaining_time": "0:34:49", "throughput": 5599.28, "total_tokens": 6953856} +{"current_steps": 14130, "total_steps": 37885, "loss": 0.0719, "lr": 1.57946541653795e-06, "epoch": 1.8648541639171174, "percentage": 37.3, "elapsed_time": "0:20:42", "remaining_time": "0:34:48", "throughput": 5599.61, "total_tokens": 6956096} +{"current_steps": 14135, "total_steps": 37885, "loss": 0.0571, "lr": 1.579089887546679e-06, "epoch": 1.8655140556948662, "percentage": 37.31, "elapsed_time": "0:20:42", "remaining_time": "0:34:47", "throughput": 5600.08, "total_tokens": 6958528} +{"current_steps": 14140, "total_steps": 37885, "loss": 0.1446, "lr": 1.578714235647773e-06, "epoch": 1.8661739474726144, "percentage": 37.32, "elapsed_time": "0:20:42", "remaining_time": "0:34:47", "throughput": 5600.37, "total_tokens": 6960704} +{"current_steps": 14145, "total_steps": 37885, "loss": 0.1826, "lr": 1.5783384609209609e-06, "epoch": 1.866833839250363, "percentage": 37.34, "elapsed_time": "0:20:43", "remaining_time": "0:34:46", "throughput": 5600.8, "total_tokens": 6963072} +{"current_steps": 14150, "total_steps": 37885, "loss": 0.0619, "lr": 1.577962563445999e-06, "epoch": 1.8674937310281114, "percentage": 37.35, "elapsed_time": "0:20:43", "remaining_time": "0:34:45", "throughput": 5601.33, "total_tokens": 6965568} +{"current_steps": 14155, "total_steps": 37885, "loss": 0.0009, "lr": 1.5775865433026679e-06, "epoch": 1.8681536228058597, "percentage": 37.36, "elapsed_time": "0:20:43", "remaining_time": "0:34:45", "throughput": 5601.75, "total_tokens": 6967936} +{"current_steps": 14160, "total_steps": 37885, "loss": 0.0008, "lr": 1.5772104005707756e-06, "epoch": 1.8688135145836084, "percentage": 37.38, "elapsed_time": "0:20:44", "remaining_time": "0:34:44", "throughput": 5602.28, "total_tokens": 6970432} +{"current_steps": 14165, "total_steps": 37885, "loss": 0.1136, "lr": 1.5768341353301554e-06, "epoch": 1.8694734063613567, "percentage": 37.39, "elapsed_time": "0:20:44", "remaining_time": "0:34:44", "throughput": 5602.94, "total_tokens": 6973120} +{"current_steps": 14170, "total_steps": 37885, "loss": 0.0006, "lr": 1.5764577476606673e-06, "epoch": 1.8701332981391052, "percentage": 37.4, "elapsed_time": "0:20:44", "remaining_time": "0:34:43", "throughput": 5603.22, "total_tokens": 6975296} +{"current_steps": 14175, "total_steps": 37885, "loss": 0.0891, "lr": 1.5760812376421965e-06, "epoch": 1.8707931899168537, "percentage": 37.42, "elapsed_time": "0:20:45", "remaining_time": "0:34:42", "throughput": 5603.7, "total_tokens": 6977728} +{"current_steps": 14180, "total_steps": 37885, "loss": 0.1602, "lr": 1.5757046053546547e-06, "epoch": 1.871453081694602, "percentage": 37.43, "elapsed_time": "0:20:45", "remaining_time": "0:34:42", "throughput": 5604.18, "total_tokens": 6980160} +{"current_steps": 14185, "total_steps": 37885, "loss": 0.081, "lr": 1.5753278508779797e-06, "epoch": 1.8721129734723505, "percentage": 37.44, "elapsed_time": "0:20:45", "remaining_time": "0:34:41", "throughput": 5604.75, "total_tokens": 6982720} +{"current_steps": 14190, "total_steps": 37885, "loss": 0.2397, "lr": 1.574950974292134e-06, "epoch": 1.872772865250099, "percentage": 37.46, "elapsed_time": "0:20:46", "remaining_time": "0:34:40", "throughput": 5605.35, "total_tokens": 6985344} +{"current_steps": 14195, "total_steps": 37885, "loss": 0.0029, "lr": 1.5745739756771078e-06, "epoch": 1.8734327570278473, "percentage": 37.47, "elapsed_time": "0:20:46", "remaining_time": "0:34:40", "throughput": 5605.81, "total_tokens": 6987776} +{"current_steps": 14200, "total_steps": 37885, "loss": 0.0016, "lr": 1.574196855112916e-06, "epoch": 1.874092648805596, "percentage": 37.48, "elapsed_time": "0:20:46", "remaining_time": "0:34:39", "throughput": 5606.23, "total_tokens": 6990144} +{"current_steps": 14205, "total_steps": 37885, "loss": 0.0731, "lr": 1.5738196126795998e-06, "epoch": 1.8747525405833443, "percentage": 37.5, "elapsed_time": "0:20:47", "remaining_time": "0:34:39", "throughput": 5606.9, "total_tokens": 6992832} +{"current_steps": 14210, "total_steps": 37885, "loss": 0.0601, "lr": 1.5734422484572258e-06, "epoch": 1.8754124323610928, "percentage": 37.51, "elapsed_time": "0:20:47", "remaining_time": "0:34:38", "throughput": 5607.36, "total_tokens": 6995264} +{"current_steps": 14215, "total_steps": 37885, "loss": 0.0583, "lr": 1.573064762525887e-06, "epoch": 1.8760723241388413, "percentage": 37.52, "elapsed_time": "0:20:47", "remaining_time": "0:34:37", "throughput": 5607.68, "total_tokens": 6997504} +{"current_steps": 14220, "total_steps": 37885, "loss": 0.0743, "lr": 1.5726871549657027e-06, "epoch": 1.8767322159165896, "percentage": 37.53, "elapsed_time": "0:20:48", "remaining_time": "0:34:37", "throughput": 5608.11, "total_tokens": 6999872} +{"current_steps": 14225, "total_steps": 37885, "loss": 0.0004, "lr": 1.5723094258568161e-06, "epoch": 1.8773921076943383, "percentage": 37.55, "elapsed_time": "0:20:48", "remaining_time": "0:34:36", "throughput": 5608.68, "total_tokens": 7002432} +{"current_steps": 14230, "total_steps": 37885, "loss": 0.087, "lr": 1.571931575279399e-06, "epoch": 1.8780519994720866, "percentage": 37.56, "elapsed_time": "0:20:48", "remaining_time": "0:34:35", "throughput": 5608.9, "total_tokens": 7004544} +{"current_steps": 14235, "total_steps": 37885, "loss": 0.0305, "lr": 1.5715536033136462e-06, "epoch": 1.878711891249835, "percentage": 37.57, "elapsed_time": "0:20:49", "remaining_time": "0:34:35", "throughput": 5609.32, "total_tokens": 7006912} +{"current_steps": 14240, "total_steps": 37885, "loss": 0.1398, "lr": 1.5711755100397798e-06, "epoch": 1.8793717830275836, "percentage": 37.59, "elapsed_time": "0:20:49", "remaining_time": "0:34:34", "throughput": 5609.85, "total_tokens": 7009408} +{"current_steps": 14245, "total_steps": 37885, "loss": 0.088, "lr": 1.570797295538048e-06, "epoch": 1.8800316748053318, "percentage": 37.6, "elapsed_time": "0:20:49", "remaining_time": "0:34:34", "throughput": 5610.38, "total_tokens": 7011904} +{"current_steps": 14250, "total_steps": 37885, "loss": 0.0514, "lr": 1.5704189598887232e-06, "epoch": 1.8806915665830803, "percentage": 37.61, "elapsed_time": "0:20:50", "remaining_time": "0:34:33", "throughput": 5610.66, "total_tokens": 7014080} +{"current_steps": 14255, "total_steps": 37885, "loss": 0.0358, "lr": 1.570040503172105e-06, "epoch": 1.8813514583608288, "percentage": 37.63, "elapsed_time": "0:20:50", "remaining_time": "0:34:32", "throughput": 5611.03, "total_tokens": 7016384} +{"current_steps": 14260, "total_steps": 37885, "loss": 0.246, "lr": 1.569661925468518e-06, "epoch": 1.8820113501385771, "percentage": 37.64, "elapsed_time": "0:20:50", "remaining_time": "0:34:32", "throughput": 5611.32, "total_tokens": 7018560} +{"current_steps": 14265, "total_steps": 37885, "loss": 0.0017, "lr": 1.5692832268583126e-06, "epoch": 1.8826712419163258, "percentage": 37.65, "elapsed_time": "0:20:51", "remaining_time": "0:34:31", "throughput": 5611.74, "total_tokens": 7020928} +{"current_steps": 14270, "total_steps": 37885, "loss": 0.2468, "lr": 1.5689044074218643e-06, "epoch": 1.8833311336940741, "percentage": 37.67, "elapsed_time": "0:20:51", "remaining_time": "0:34:30", "throughput": 5612.49, "total_tokens": 7023744} +{"current_steps": 14275, "total_steps": 37885, "loss": 0.0838, "lr": 1.5685254672395753e-06, "epoch": 1.8839910254718226, "percentage": 37.68, "elapsed_time": "0:20:51", "remaining_time": "0:34:30", "throughput": 5613.0, "total_tokens": 7026240} +{"current_steps": 14280, "total_steps": 37885, "loss": 0.0016, "lr": 1.568146406391873e-06, "epoch": 1.8846509172495711, "percentage": 37.69, "elapsed_time": "0:20:52", "remaining_time": "0:34:29", "throughput": 5613.38, "total_tokens": 7028544} +{"current_steps": 14285, "total_steps": 37885, "loss": 0.0511, "lr": 1.5677672249592101e-06, "epoch": 1.8853108090273194, "percentage": 37.71, "elapsed_time": "0:20:52", "remaining_time": "0:34:29", "throughput": 5613.9, "total_tokens": 7031040} +{"current_steps": 14290, "total_steps": 37885, "loss": 0.1894, "lr": 1.567387923022065e-06, "epoch": 1.8859707008050681, "percentage": 37.72, "elapsed_time": "0:20:52", "remaining_time": "0:34:28", "throughput": 5614.16, "total_tokens": 7033216} +{"current_steps": 14295, "total_steps": 37885, "loss": 0.0037, "lr": 1.567008500660942e-06, "epoch": 1.8866305925828164, "percentage": 37.73, "elapsed_time": "0:20:53", "remaining_time": "0:34:27", "throughput": 5614.65, "total_tokens": 7035712} +{"current_steps": 14300, "total_steps": 37885, "loss": 0.0039, "lr": 1.5666289579563708e-06, "epoch": 1.887290484360565, "percentage": 37.75, "elapsed_time": "0:20:53", "remaining_time": "0:34:27", "throughput": 5615.34, "total_tokens": 7038464} +{"current_steps": 14305, "total_steps": 37885, "loss": 0.142, "lr": 1.5662492949889065e-06, "epoch": 1.8879503761383134, "percentage": 37.76, "elapsed_time": "0:20:53", "remaining_time": "0:34:26", "throughput": 5615.74, "total_tokens": 7040832} +{"current_steps": 14310, "total_steps": 37885, "loss": 0.002, "lr": 1.5658695118391299e-06, "epoch": 1.8886102679160617, "percentage": 37.77, "elapsed_time": "0:20:54", "remaining_time": "0:34:26", "throughput": 5616.33, "total_tokens": 7043456} +{"current_steps": 14315, "total_steps": 37885, "loss": 0.0934, "lr": 1.5654896085876468e-06, "epoch": 1.8892701596938102, "percentage": 37.79, "elapsed_time": "0:20:54", "remaining_time": "0:34:25", "throughput": 5616.88, "total_tokens": 7046016} +{"current_steps": 14320, "total_steps": 37885, "loss": 0.0622, "lr": 1.5651095853150893e-06, "epoch": 1.8899300514715587, "percentage": 37.8, "elapsed_time": "0:20:54", "remaining_time": "0:34:24", "throughput": 5617.54, "total_tokens": 7048704} +{"current_steps": 14325, "total_steps": 37885, "loss": 0.1235, "lr": 1.5647294421021144e-06, "epoch": 1.890589943249307, "percentage": 37.81, "elapsed_time": "0:20:55", "remaining_time": "0:34:24", "throughput": 5617.86, "total_tokens": 7050944} +{"current_steps": 14330, "total_steps": 37885, "loss": 0.0469, "lr": 1.5643491790294054e-06, "epoch": 1.8912498350270557, "percentage": 37.82, "elapsed_time": "0:20:55", "remaining_time": "0:34:23", "throughput": 5618.28, "total_tokens": 7053312} +{"current_steps": 14335, "total_steps": 37885, "loss": 0.0867, "lr": 1.5639687961776695e-06, "epoch": 1.891909726804804, "percentage": 37.84, "elapsed_time": "0:20:55", "remaining_time": "0:34:22", "throughput": 5618.89, "total_tokens": 7055936} +{"current_steps": 14340, "total_steps": 37885, "loss": 0.0129, "lr": 1.5635882936276403e-06, "epoch": 1.8925696185825525, "percentage": 37.85, "elapsed_time": "0:20:56", "remaining_time": "0:34:22", "throughput": 5619.58, "total_tokens": 7058688} +{"current_steps": 14345, "total_steps": 37885, "loss": 0.0382, "lr": 1.5632076714600773e-06, "epoch": 1.893229510360301, "percentage": 37.86, "elapsed_time": "0:20:56", "remaining_time": "0:34:21", "throughput": 5620.17, "total_tokens": 7061312} +{"current_steps": 14350, "total_steps": 37885, "loss": 0.0011, "lr": 1.5628269297557644e-06, "epoch": 1.8938894021380492, "percentage": 37.88, "elapsed_time": "0:20:56", "remaining_time": "0:34:21", "throughput": 5620.68, "total_tokens": 7063808} +{"current_steps": 14355, "total_steps": 37885, "loss": 0.1335, "lr": 1.5624460685955115e-06, "epoch": 1.894549293915798, "percentage": 37.89, "elapsed_time": "0:20:57", "remaining_time": "0:34:20", "throughput": 5621.25, "total_tokens": 7066368} +{"current_steps": 14360, "total_steps": 37885, "loss": 0.1012, "lr": 1.562065088060153e-06, "epoch": 1.8952091856935462, "percentage": 37.9, "elapsed_time": "0:20:57", "remaining_time": "0:34:19", "throughput": 5621.85, "total_tokens": 7068992} +{"current_steps": 14365, "total_steps": 37885, "loss": 0.0007, "lr": 1.5616839882305498e-06, "epoch": 1.8958690774712947, "percentage": 37.92, "elapsed_time": "0:20:57", "remaining_time": "0:34:19", "throughput": 5622.22, "total_tokens": 7071296} +{"current_steps": 14370, "total_steps": 37885, "loss": 0.0008, "lr": 1.5613027691875877e-06, "epoch": 1.8965289692490432, "percentage": 37.93, "elapsed_time": "0:20:58", "remaining_time": "0:34:18", "throughput": 5622.53, "total_tokens": 7073536} +{"current_steps": 14375, "total_steps": 37885, "loss": 0.1195, "lr": 1.5609214310121775e-06, "epoch": 1.8971888610267915, "percentage": 37.94, "elapsed_time": "0:20:58", "remaining_time": "0:34:18", "throughput": 5623.18, "total_tokens": 7076224} +{"current_steps": 14380, "total_steps": 37885, "loss": 0.0009, "lr": 1.5605399737852554e-06, "epoch": 1.89784875280454, "percentage": 37.96, "elapsed_time": "0:20:58", "remaining_time": "0:34:17", "throughput": 5623.5, "total_tokens": 7078464} +{"current_steps": 14385, "total_steps": 37885, "loss": 0.0817, "lr": 1.560158397587783e-06, "epoch": 1.8985086445822885, "percentage": 37.97, "elapsed_time": "0:20:59", "remaining_time": "0:34:16", "throughput": 5624.2, "total_tokens": 7081216} +{"current_steps": 14390, "total_steps": 37885, "loss": 0.1318, "lr": 1.559776702500747e-06, "epoch": 1.8991685363600368, "percentage": 37.98, "elapsed_time": "0:20:59", "remaining_time": "0:34:16", "throughput": 5624.65, "total_tokens": 7083648} +{"current_steps": 14395, "total_steps": 37885, "loss": 0.0004, "lr": 1.5593948886051592e-06, "epoch": 1.8998284281377855, "percentage": 38.0, "elapsed_time": "0:20:59", "remaining_time": "0:34:15", "throughput": 5625.02, "total_tokens": 7085952} +{"current_steps": 14400, "total_steps": 37885, "loss": 0.1321, "lr": 1.5590129559820575e-06, "epoch": 1.9004883199155338, "percentage": 38.01, "elapsed_time": "0:21:00", "remaining_time": "0:34:15", "throughput": 5625.67, "total_tokens": 7088640} +{"current_steps": 14405, "total_steps": 37885, "loss": 0.0028, "lr": 1.5586309047125039e-06, "epoch": 1.9011482116932823, "percentage": 38.02, "elapsed_time": "0:21:00", "remaining_time": "0:34:14", "throughput": 5626.42, "total_tokens": 7091456} +{"current_steps": 14410, "total_steps": 37885, "loss": 0.0625, "lr": 1.5582487348775862e-06, "epoch": 1.9018081034710308, "percentage": 38.04, "elapsed_time": "0:21:00", "remaining_time": "0:34:13", "throughput": 5626.93, "total_tokens": 7093952} +{"current_steps": 14415, "total_steps": 37885, "loss": 0.0348, "lr": 1.5578664465584168e-06, "epoch": 1.902467995248779, "percentage": 38.05, "elapsed_time": "0:21:01", "remaining_time": "0:34:13", "throughput": 5627.31, "total_tokens": 7096256} +{"current_steps": 14420, "total_steps": 37885, "loss": 0.1055, "lr": 1.5574840398361339e-06, "epoch": 1.9031278870265278, "percentage": 38.06, "elapsed_time": "0:21:01", "remaining_time": "0:34:12", "throughput": 5627.86, "total_tokens": 7098816} +{"current_steps": 14425, "total_steps": 37885, "loss": 0.0709, "lr": 1.5571015147919005e-06, "epoch": 1.903787778804276, "percentage": 38.08, "elapsed_time": "0:21:01", "remaining_time": "0:34:11", "throughput": 5628.19, "total_tokens": 7101056} +{"current_steps": 14430, "total_steps": 37885, "loss": 0.1621, "lr": 1.5567188715069048e-06, "epoch": 1.9044476705820246, "percentage": 38.09, "elapsed_time": "0:21:02", "remaining_time": "0:34:11", "throughput": 5628.66, "total_tokens": 7103488} +{"current_steps": 14435, "total_steps": 37885, "loss": 0.0179, "lr": 1.5563361100623604e-06, "epoch": 1.905107562359773, "percentage": 38.1, "elapsed_time": "0:21:02", "remaining_time": "0:34:10", "throughput": 5629.03, "total_tokens": 7105792} +{"current_steps": 14440, "total_steps": 37885, "loss": 0.0417, "lr": 1.555953230539505e-06, "epoch": 1.9057674541375214, "percentage": 38.12, "elapsed_time": "0:21:02", "remaining_time": "0:34:10", "throughput": 5629.31, "total_tokens": 7107968} +{"current_steps": 14445, "total_steps": 37885, "loss": 0.0521, "lr": 1.5555702330196021e-06, "epoch": 1.9064273459152699, "percentage": 38.13, "elapsed_time": "0:21:03", "remaining_time": "0:34:09", "throughput": 5629.91, "total_tokens": 7110592} +{"current_steps": 14450, "total_steps": 37885, "loss": 0.2691, "lr": 1.5551871175839406e-06, "epoch": 1.9070872376930184, "percentage": 38.14, "elapsed_time": "0:21:03", "remaining_time": "0:34:08", "throughput": 5630.13, "total_tokens": 7112704} +{"current_steps": 14455, "total_steps": 37885, "loss": 0.0585, "lr": 1.5548038843138338e-06, "epoch": 1.9077471294707666, "percentage": 38.15, "elapsed_time": "0:21:03", "remaining_time": "0:34:08", "throughput": 5630.64, "total_tokens": 7115200} +{"current_steps": 14460, "total_steps": 37885, "loss": 0.1164, "lr": 1.5544205332906201e-06, "epoch": 1.9084070212485154, "percentage": 38.17, "elapsed_time": "0:21:03", "remaining_time": "0:34:07", "throughput": 5631.11, "total_tokens": 7117632} +{"current_steps": 14465, "total_steps": 37885, "loss": 0.221, "lr": 1.554037064595663e-06, "epoch": 1.9090669130262636, "percentage": 38.18, "elapsed_time": "0:21:04", "remaining_time": "0:34:07", "throughput": 5631.58, "total_tokens": 7120064} +{"current_steps": 14470, "total_steps": 37885, "loss": 0.1352, "lr": 1.553653478310351e-06, "epoch": 1.9097268048040121, "percentage": 38.19, "elapsed_time": "0:21:04", "remaining_time": "0:34:06", "throughput": 5632.14, "total_tokens": 7122624} +{"current_steps": 14475, "total_steps": 37885, "loss": 0.0808, "lr": 1.5532697745160972e-06, "epoch": 1.9103866965817606, "percentage": 38.21, "elapsed_time": "0:21:04", "remaining_time": "0:34:05", "throughput": 5632.55, "total_tokens": 7124992} +{"current_steps": 14480, "total_steps": 37885, "loss": 0.0018, "lr": 1.5528859532943405e-06, "epoch": 1.911046588359509, "percentage": 38.22, "elapsed_time": "0:21:05", "remaining_time": "0:34:05", "throughput": 5633.01, "total_tokens": 7127424} +{"current_steps": 14485, "total_steps": 37885, "loss": 0.0209, "lr": 1.552502014726544e-06, "epoch": 1.9117064801372576, "percentage": 38.23, "elapsed_time": "0:21:05", "remaining_time": "0:34:04", "throughput": 5633.52, "total_tokens": 7129920} +{"current_steps": 14490, "total_steps": 37885, "loss": 0.0735, "lr": 1.5521179588941956e-06, "epoch": 1.912366371915006, "percentage": 38.25, "elapsed_time": "0:21:05", "remaining_time": "0:34:03", "throughput": 5633.97, "total_tokens": 7132352} +{"current_steps": 14495, "total_steps": 37885, "loss": 0.0481, "lr": 1.5517337858788087e-06, "epoch": 1.9130262636927544, "percentage": 38.26, "elapsed_time": "0:21:06", "remaining_time": "0:34:03", "throughput": 5634.48, "total_tokens": 7134848} +{"current_steps": 14500, "total_steps": 37885, "loss": 0.0019, "lr": 1.551349495761921e-06, "epoch": 1.913686155470503, "percentage": 38.27, "elapsed_time": "0:21:06", "remaining_time": "0:34:02", "throughput": 5634.96, "total_tokens": 7137344} +{"current_steps": 14505, "total_steps": 37885, "loss": 0.1998, "lr": 1.550965088625095e-06, "epoch": 1.9143460472482512, "percentage": 38.29, "elapsed_time": "0:21:06", "remaining_time": "0:34:02", "throughput": 5635.55, "total_tokens": 7139968} +{"current_steps": 14510, "total_steps": 37885, "loss": 0.0009, "lr": 1.5505805645499192e-06, "epoch": 1.9150059390259997, "percentage": 38.3, "elapsed_time": "0:21:07", "remaining_time": "0:34:01", "throughput": 5636.05, "total_tokens": 7142464} +{"current_steps": 14515, "total_steps": 37885, "loss": 0.0447, "lr": 1.5501959236180053e-06, "epoch": 1.9156658308037482, "percentage": 38.31, "elapsed_time": "0:21:07", "remaining_time": "0:34:00", "throughput": 5636.64, "total_tokens": 7145088} +{"current_steps": 14520, "total_steps": 37885, "loss": 0.0005, "lr": 1.5498111659109908e-06, "epoch": 1.9163257225814965, "percentage": 38.33, "elapsed_time": "0:21:07", "remaining_time": "0:34:00", "throughput": 5637.26, "total_tokens": 7147712} +{"current_steps": 14525, "total_steps": 37885, "loss": 0.1215, "lr": 1.549426291510538e-06, "epoch": 1.9169856143592452, "percentage": 38.34, "elapsed_time": "0:21:08", "remaining_time": "0:33:59", "throughput": 5637.73, "total_tokens": 7150144} +{"current_steps": 14530, "total_steps": 37885, "loss": 0.215, "lr": 1.5490413004983334e-06, "epoch": 1.9176455061369935, "percentage": 38.35, "elapsed_time": "0:21:08", "remaining_time": "0:33:59", "throughput": 5638.24, "total_tokens": 7152640} +{"current_steps": 14535, "total_steps": 37885, "loss": 0.1271, "lr": 1.5486561929560887e-06, "epoch": 1.918305397914742, "percentage": 38.37, "elapsed_time": "0:21:08", "remaining_time": "0:33:58", "throughput": 5638.78, "total_tokens": 7155200} +{"current_steps": 14540, "total_steps": 37885, "loss": 0.0219, "lr": 1.5482709689655398e-06, "epoch": 1.9189652896924905, "percentage": 38.38, "elapsed_time": "0:21:09", "remaining_time": "0:33:57", "throughput": 5639.04, "total_tokens": 7157376} +{"current_steps": 14545, "total_steps": 37885, "loss": 0.0957, "lr": 1.5478856286084483e-06, "epoch": 1.9196251814702388, "percentage": 38.39, "elapsed_time": "0:21:09", "remaining_time": "0:33:57", "throughput": 5639.64, "total_tokens": 7160000} +{"current_steps": 14550, "total_steps": 37885, "loss": 0.1187, "lr": 1.5475001719665994e-06, "epoch": 1.9202850732479875, "percentage": 38.41, "elapsed_time": "0:21:09", "remaining_time": "0:33:56", "throughput": 5640.11, "total_tokens": 7162432} +{"current_steps": 14555, "total_steps": 37885, "loss": 0.256, "lr": 1.5471145991218037e-06, "epoch": 1.9209449650257358, "percentage": 38.42, "elapsed_time": "0:21:10", "remaining_time": "0:33:56", "throughput": 5640.47, "total_tokens": 7164736} +{"current_steps": 14560, "total_steps": 37885, "loss": 0.0013, "lr": 1.5467289101558962e-06, "epoch": 1.9216048568034843, "percentage": 38.43, "elapsed_time": "0:21:10", "remaining_time": "0:33:55", "throughput": 5640.87, "total_tokens": 7167104} +{"current_steps": 14565, "total_steps": 37885, "loss": 0.0016, "lr": 1.5463431051507368e-06, "epoch": 1.9222647485812328, "percentage": 38.45, "elapsed_time": "0:21:10", "remaining_time": "0:33:54", "throughput": 5641.6, "total_tokens": 7169920} +{"current_steps": 14570, "total_steps": 37885, "loss": 0.0322, "lr": 1.5459571841882095e-06, "epoch": 1.922924640358981, "percentage": 38.46, "elapsed_time": "0:21:11", "remaining_time": "0:33:54", "throughput": 5641.97, "total_tokens": 7172224} +{"current_steps": 14575, "total_steps": 37885, "loss": 0.0529, "lr": 1.5455711473502233e-06, "epoch": 1.9235845321367295, "percentage": 38.47, "elapsed_time": "0:21:11", "remaining_time": "0:33:53", "throughput": 5642.57, "total_tokens": 7174848} +{"current_steps": 14580, "total_steps": 37885, "loss": 0.1527, "lr": 1.5451849947187121e-06, "epoch": 1.924244423914478, "percentage": 38.48, "elapsed_time": "0:21:11", "remaining_time": "0:33:53", "throughput": 5643.21, "total_tokens": 7177536} +{"current_steps": 14585, "total_steps": 37885, "loss": 0.1494, "lr": 1.5447987263756335e-06, "epoch": 1.9249043156922263, "percentage": 38.5, "elapsed_time": "0:21:12", "remaining_time": "0:33:52", "throughput": 5643.49, "total_tokens": 7179712} +{"current_steps": 14590, "total_steps": 37885, "loss": 0.0758, "lr": 1.5444123424029703e-06, "epoch": 1.925564207469975, "percentage": 38.51, "elapsed_time": "0:21:12", "remaining_time": "0:33:51", "throughput": 5643.85, "total_tokens": 7182016} +{"current_steps": 14595, "total_steps": 37885, "loss": 0.0535, "lr": 1.5440258428827298e-06, "epoch": 1.9262240992477233, "percentage": 38.52, "elapsed_time": "0:21:12", "remaining_time": "0:33:51", "throughput": 5644.44, "total_tokens": 7184640} +{"current_steps": 14600, "total_steps": 37885, "loss": 0.0964, "lr": 1.5436392278969438e-06, "epoch": 1.9268839910254718, "percentage": 38.54, "elapsed_time": "0:21:13", "remaining_time": "0:33:50", "throughput": 5644.89, "total_tokens": 7187072} +{"current_steps": 14605, "total_steps": 37885, "loss": 0.0184, "lr": 1.5432524975276681e-06, "epoch": 1.9275438828032203, "percentage": 38.55, "elapsed_time": "0:21:13", "remaining_time": "0:33:49", "throughput": 5645.21, "total_tokens": 7189312} +{"current_steps": 14610, "total_steps": 37885, "loss": 0.0008, "lr": 1.5428656518569838e-06, "epoch": 1.9282037745809686, "percentage": 38.56, "elapsed_time": "0:21:13", "remaining_time": "0:33:49", "throughput": 5645.63, "total_tokens": 7191680} +{"current_steps": 14615, "total_steps": 37885, "loss": 0.2284, "lr": 1.5424786909669962e-06, "epoch": 1.9288636663587173, "percentage": 38.58, "elapsed_time": "0:21:14", "remaining_time": "0:33:48", "throughput": 5646.04, "total_tokens": 7194048} +{"current_steps": 14620, "total_steps": 37885, "loss": 0.0006, "lr": 1.5420916149398346e-06, "epoch": 1.9295235581364656, "percentage": 38.59, "elapsed_time": "0:21:14", "remaining_time": "0:33:48", "throughput": 5646.72, "total_tokens": 7196800} +{"current_steps": 14625, "total_steps": 37885, "loss": 0.0009, "lr": 1.5417044238576533e-06, "epoch": 1.930183449914214, "percentage": 38.6, "elapsed_time": "0:21:14", "remaining_time": "0:33:47", "throughput": 5647.16, "total_tokens": 7199232} +{"current_steps": 14630, "total_steps": 37885, "loss": 0.1908, "lr": 1.5413171178026308e-06, "epoch": 1.9308433416919626, "percentage": 38.62, "elapsed_time": "0:21:15", "remaining_time": "0:33:46", "throughput": 5647.71, "total_tokens": 7201792} +{"current_steps": 14635, "total_steps": 37885, "loss": 0.073, "lr": 1.5409296968569698e-06, "epoch": 1.9315032334697109, "percentage": 38.63, "elapsed_time": "0:21:15", "remaining_time": "0:33:46", "throughput": 5648.25, "total_tokens": 7204352} +{"current_steps": 14640, "total_steps": 37885, "loss": 0.0874, "lr": 1.540542161102898e-06, "epoch": 1.9321631252474594, "percentage": 38.64, "elapsed_time": "0:21:15", "remaining_time": "0:33:45", "throughput": 5648.78, "total_tokens": 7206912} +{"current_steps": 14645, "total_steps": 37885, "loss": 0.0365, "lr": 1.5401545106226665e-06, "epoch": 1.9328230170252079, "percentage": 38.66, "elapsed_time": "0:21:16", "remaining_time": "0:33:45", "throughput": 5649.09, "total_tokens": 7209152} +{"current_steps": 14650, "total_steps": 37885, "loss": 0.1238, "lr": 1.539766745498552e-06, "epoch": 1.9334829088029564, "percentage": 38.67, "elapsed_time": "0:21:16", "remaining_time": "0:33:44", "throughput": 5649.65, "total_tokens": 7211712} +{"current_steps": 14655, "total_steps": 37885, "loss": 0.0643, "lr": 1.5393788658128542e-06, "epoch": 1.9341428005807049, "percentage": 38.68, "elapsed_time": "0:21:16", "remaining_time": "0:33:43", "throughput": 5650.1, "total_tokens": 7214144} +{"current_steps": 14660, "total_steps": 37885, "loss": 0.0293, "lr": 1.538990871647898e-06, "epoch": 1.9348026923584531, "percentage": 38.7, "elapsed_time": "0:21:17", "remaining_time": "0:33:43", "throughput": 5650.69, "total_tokens": 7216768} +{"current_steps": 14665, "total_steps": 37885, "loss": 0.05, "lr": 1.5386027630860324e-06, "epoch": 1.9354625841362016, "percentage": 38.71, "elapsed_time": "0:21:17", "remaining_time": "0:33:42", "throughput": 5651.14, "total_tokens": 7219200} +{"current_steps": 14670, "total_steps": 37885, "loss": 0.05, "lr": 1.5382145402096307e-06, "epoch": 1.9361224759139501, "percentage": 38.72, "elapsed_time": "0:21:17", "remaining_time": "0:33:42", "throughput": 5651.68, "total_tokens": 7221760} +{"current_steps": 14675, "total_steps": 37885, "loss": 0.0418, "lr": 1.53782620310109e-06, "epoch": 1.9367823676916984, "percentage": 38.74, "elapsed_time": "0:21:18", "remaining_time": "0:33:41", "throughput": 5652.13, "total_tokens": 7224192} +{"current_steps": 14680, "total_steps": 37885, "loss": 0.0005, "lr": 1.5374377518428324e-06, "epoch": 1.9374422594694471, "percentage": 38.75, "elapsed_time": "0:21:18", "remaining_time": "0:33:40", "throughput": 5652.53, "total_tokens": 7226560} +{"current_steps": 14685, "total_steps": 37885, "loss": 0.0937, "lr": 1.5370491865173042e-06, "epoch": 1.9381021512471954, "percentage": 38.76, "elapsed_time": "0:21:18", "remaining_time": "0:33:40", "throughput": 5653.02, "total_tokens": 7229056} +{"current_steps": 14690, "total_steps": 37885, "loss": 0.0438, "lr": 1.5366605072069747e-06, "epoch": 1.938762043024944, "percentage": 38.78, "elapsed_time": "0:21:19", "remaining_time": "0:33:39", "throughput": 5653.73, "total_tokens": 7231872} +{"current_steps": 14695, "total_steps": 37885, "loss": 0.0753, "lr": 1.5362717139943392e-06, "epoch": 1.9394219348026924, "percentage": 38.79, "elapsed_time": "0:21:19", "remaining_time": "0:33:39", "throughput": 5654.14, "total_tokens": 7234240} +{"current_steps": 14700, "total_steps": 37885, "loss": 0.2984, "lr": 1.5358828069619155e-06, "epoch": 1.9400818265804407, "percentage": 38.8, "elapsed_time": "0:21:19", "remaining_time": "0:33:38", "throughput": 5654.68, "total_tokens": 7236800} +{"current_steps": 14705, "total_steps": 37885, "loss": 0.0489, "lr": 1.5354937861922463e-06, "epoch": 1.9407417183581892, "percentage": 38.81, "elapsed_time": "0:21:20", "remaining_time": "0:33:37", "throughput": 5655.25, "total_tokens": 7239424} +{"current_steps": 14710, "total_steps": 37885, "loss": 0.2381, "lr": 1.5351046517678989e-06, "epoch": 1.9414016101359377, "percentage": 38.83, "elapsed_time": "0:21:20", "remaining_time": "0:33:37", "throughput": 5655.75, "total_tokens": 7241920} +{"current_steps": 14715, "total_steps": 37885, "loss": 0.1093, "lr": 1.534715403771464e-06, "epoch": 1.9420615019136862, "percentage": 38.84, "elapsed_time": "0:21:20", "remaining_time": "0:33:36", "throughput": 5656.06, "total_tokens": 7244160} +{"current_steps": 14720, "total_steps": 37885, "loss": 0.0006, "lr": 1.5343260422855573e-06, "epoch": 1.9427213936914347, "percentage": 38.85, "elapsed_time": "0:21:21", "remaining_time": "0:33:36", "throughput": 5656.51, "total_tokens": 7246592} +{"current_steps": 14725, "total_steps": 37885, "loss": 0.0014, "lr": 1.5339365673928168e-06, "epoch": 1.943381285469183, "percentage": 38.87, "elapsed_time": "0:21:21", "remaining_time": "0:33:35", "throughput": 5657.14, "total_tokens": 7249280} +{"current_steps": 14730, "total_steps": 37885, "loss": 0.0014, "lr": 1.5335469791759068e-06, "epoch": 1.9440411772469315, "percentage": 38.88, "elapsed_time": "0:21:21", "remaining_time": "0:33:34", "throughput": 5657.61, "total_tokens": 7251712} +{"current_steps": 14735, "total_steps": 37885, "loss": 0.0009, "lr": 1.5331572777175137e-06, "epoch": 1.94470106902468, "percentage": 38.89, "elapsed_time": "0:21:22", "remaining_time": "0:33:34", "throughput": 5658.06, "total_tokens": 7254144} +{"current_steps": 14740, "total_steps": 37885, "loss": 0.0006, "lr": 1.5327674631003493e-06, "epoch": 1.9453609608024283, "percentage": 38.91, "elapsed_time": "0:21:22", "remaining_time": "0:33:33", "throughput": 5658.56, "total_tokens": 7256640} +{"current_steps": 14745, "total_steps": 37885, "loss": 0.2108, "lr": 1.5323775354071491e-06, "epoch": 1.946020852580177, "percentage": 38.92, "elapsed_time": "0:21:22", "remaining_time": "0:33:33", "throughput": 5659.14, "total_tokens": 7259264} +{"current_steps": 14750, "total_steps": 37885, "loss": 0.0767, "lr": 1.531987494720672e-06, "epoch": 1.9466807443579253, "percentage": 38.93, "elapsed_time": "0:21:23", "remaining_time": "0:33:32", "throughput": 5659.64, "total_tokens": 7261760} +{"current_steps": 14755, "total_steps": 37885, "loss": 0.1239, "lr": 1.5315973411237016e-06, "epoch": 1.9473406361356738, "percentage": 38.95, "elapsed_time": "0:21:23", "remaining_time": "0:33:31", "throughput": 5660.04, "total_tokens": 7264128} +{"current_steps": 14760, "total_steps": 37885, "loss": 0.0343, "lr": 1.531207074699045e-06, "epoch": 1.9480005279134223, "percentage": 38.96, "elapsed_time": "0:21:23", "remaining_time": "0:33:31", "throughput": 5660.4, "total_tokens": 7266432} +{"current_steps": 14765, "total_steps": 37885, "loss": 0.2684, "lr": 1.5308166955295334e-06, "epoch": 1.9486604196911705, "percentage": 38.97, "elapsed_time": "0:21:24", "remaining_time": "0:33:30", "throughput": 5660.8, "total_tokens": 7268800} +{"current_steps": 14770, "total_steps": 37885, "loss": 0.0569, "lr": 1.5304262036980221e-06, "epoch": 1.949320311468919, "percentage": 38.99, "elapsed_time": "0:21:24", "remaining_time": "0:33:30", "throughput": 5661.42, "total_tokens": 7271488} +{"current_steps": 14775, "total_steps": 37885, "loss": 0.0474, "lr": 1.5300355992873903e-06, "epoch": 1.9499802032466675, "percentage": 39.0, "elapsed_time": "0:21:24", "remaining_time": "0:33:29", "throughput": 5661.9, "total_tokens": 7273984} +{"current_steps": 14780, "total_steps": 37885, "loss": 0.0888, "lr": 1.5296448823805407e-06, "epoch": 1.950640095024416, "percentage": 39.01, "elapsed_time": "0:21:25", "remaining_time": "0:33:28", "throughput": 5662.63, "total_tokens": 7276800} +{"current_steps": 14785, "total_steps": 37885, "loss": 0.001, "lr": 1.5292540530603998e-06, "epoch": 1.9512999868021645, "percentage": 39.03, "elapsed_time": "0:21:25", "remaining_time": "0:33:28", "throughput": 5663.33, "total_tokens": 7279616} +{"current_steps": 14790, "total_steps": 37885, "loss": 0.0512, "lr": 1.5288631114099193e-06, "epoch": 1.9519598785799128, "percentage": 39.04, "elapsed_time": "0:21:25", "remaining_time": "0:33:27", "throughput": 5663.64, "total_tokens": 7281856} +{"current_steps": 14795, "total_steps": 37885, "loss": 0.0006, "lr": 1.528472057512073e-06, "epoch": 1.9526197703576613, "percentage": 39.05, "elapsed_time": "0:21:26", "remaining_time": "0:33:27", "throughput": 5664.25, "total_tokens": 7284544} +{"current_steps": 14800, "total_steps": 37885, "loss": 0.0009, "lr": 1.5280808914498593e-06, "epoch": 1.9532796621354098, "percentage": 39.07, "elapsed_time": "0:21:26", "remaining_time": "0:33:26", "throughput": 5664.75, "total_tokens": 7287040} +{"current_steps": 14805, "total_steps": 37885, "loss": 0.0468, "lr": 1.5276896133063e-06, "epoch": 1.953939553913158, "percentage": 39.08, "elapsed_time": "0:21:26", "remaining_time": "0:33:25", "throughput": 5665.47, "total_tokens": 7289856} +{"current_steps": 14810, "total_steps": 37885, "loss": 0.1041, "lr": 1.5272982231644421e-06, "epoch": 1.9545994456909068, "percentage": 39.09, "elapsed_time": "0:21:27", "remaining_time": "0:33:25", "throughput": 5665.77, "total_tokens": 7292096} +{"current_steps": 14815, "total_steps": 37885, "loss": 0.1564, "lr": 1.5269067211073545e-06, "epoch": 1.955259337468655, "percentage": 39.11, "elapsed_time": "0:21:27", "remaining_time": "0:33:24", "throughput": 5666.35, "total_tokens": 7294720} +{"current_steps": 14820, "total_steps": 37885, "loss": 0.2679, "lr": 1.5265151072181309e-06, "epoch": 1.9559192292464036, "percentage": 39.12, "elapsed_time": "0:21:27", "remaining_time": "0:33:24", "throughput": 5666.94, "total_tokens": 7297344} +{"current_steps": 14825, "total_steps": 37885, "loss": 0.1056, "lr": 1.5261233815798886e-06, "epoch": 1.956579121024152, "percentage": 39.13, "elapsed_time": "0:21:28", "remaining_time": "0:33:23", "throughput": 5667.38, "total_tokens": 7299776} +{"current_steps": 14830, "total_steps": 37885, "loss": 0.0649, "lr": 1.5257315442757682e-06, "epoch": 1.9572390128019004, "percentage": 39.14, "elapsed_time": "0:21:28", "remaining_time": "0:33:22", "throughput": 5667.88, "total_tokens": 7302272} +{"current_steps": 14835, "total_steps": 37885, "loss": 0.0355, "lr": 1.5253395953889349e-06, "epoch": 1.957898904579649, "percentage": 39.16, "elapsed_time": "0:21:28", "remaining_time": "0:33:22", "throughput": 5668.22, "total_tokens": 7304576} +{"current_steps": 14840, "total_steps": 37885, "loss": 0.0764, "lr": 1.5249475350025764e-06, "epoch": 1.9585587963573974, "percentage": 39.17, "elapsed_time": "0:21:29", "remaining_time": "0:33:21", "throughput": 5668.97, "total_tokens": 7307456} +{"current_steps": 14845, "total_steps": 37885, "loss": 0.0557, "lr": 1.5245553631999054e-06, "epoch": 1.9592186881351459, "percentage": 39.18, "elapsed_time": "0:21:29", "remaining_time": "0:33:21", "throughput": 5669.36, "total_tokens": 7309824} +{"current_steps": 14850, "total_steps": 37885, "loss": 0.0007, "lr": 1.5241630800641567e-06, "epoch": 1.9598785799128944, "percentage": 39.2, "elapsed_time": "0:21:29", "remaining_time": "0:33:20", "throughput": 5669.77, "total_tokens": 7312192} +{"current_steps": 14855, "total_steps": 37885, "loss": 0.0836, "lr": 1.5237706856785898e-06, "epoch": 1.9605384716906427, "percentage": 39.21, "elapsed_time": "0:21:30", "remaining_time": "0:33:19", "throughput": 5670.07, "total_tokens": 7314432} +{"current_steps": 14860, "total_steps": 37885, "loss": 0.0521, "lr": 1.523378180126488e-06, "epoch": 1.9611983634683912, "percentage": 39.22, "elapsed_time": "0:21:30", "remaining_time": "0:33:19", "throughput": 5670.56, "total_tokens": 7316928} +{"current_steps": 14865, "total_steps": 37885, "loss": 0.0006, "lr": 1.5229855634911575e-06, "epoch": 1.9618582552461397, "percentage": 39.24, "elapsed_time": "0:21:30", "remaining_time": "0:33:18", "throughput": 5671.18, "total_tokens": 7319616} +{"current_steps": 14870, "total_steps": 37885, "loss": 0.002, "lr": 1.5225928358559283e-06, "epoch": 1.962518147023888, "percentage": 39.25, "elapsed_time": "0:21:30", "remaining_time": "0:33:18", "throughput": 5671.47, "total_tokens": 7321856} +{"current_steps": 14875, "total_steps": 37885, "loss": 0.083, "lr": 1.522199997304154e-06, "epoch": 1.9631780388016367, "percentage": 39.26, "elapsed_time": "0:21:31", "remaining_time": "0:33:17", "throughput": 5671.86, "total_tokens": 7324224} +{"current_steps": 14880, "total_steps": 37885, "loss": 0.047, "lr": 1.5218070479192118e-06, "epoch": 1.963837930579385, "percentage": 39.28, "elapsed_time": "0:21:31", "remaining_time": "0:33:16", "throughput": 5672.31, "total_tokens": 7326656} +{"current_steps": 14885, "total_steps": 37885, "loss": 0.3865, "lr": 1.521413987784502e-06, "epoch": 1.9644978223571334, "percentage": 39.29, "elapsed_time": "0:21:31", "remaining_time": "0:33:16", "throughput": 5672.89, "total_tokens": 7329280} +{"current_steps": 14890, "total_steps": 37885, "loss": 0.1041, "lr": 1.5210208169834496e-06, "epoch": 1.965157714134882, "percentage": 39.3, "elapsed_time": "0:21:32", "remaining_time": "0:33:15", "throughput": 5673.29, "total_tokens": 7331648} +{"current_steps": 14895, "total_steps": 37885, "loss": 0.0013, "lr": 1.5206275355995013e-06, "epoch": 1.9658176059126302, "percentage": 39.32, "elapsed_time": "0:21:32", "remaining_time": "0:33:15", "throughput": 5673.78, "total_tokens": 7334144} +{"current_steps": 14900, "total_steps": 37885, "loss": 0.0005, "lr": 1.5202341437161288e-06, "epoch": 1.966477497690379, "percentage": 39.33, "elapsed_time": "0:21:32", "remaining_time": "0:33:14", "throughput": 5674.27, "total_tokens": 7336640} +{"current_steps": 14905, "total_steps": 37885, "loss": 0.0005, "lr": 1.5198406414168266e-06, "epoch": 1.9671373894681272, "percentage": 39.34, "elapsed_time": "0:21:33", "remaining_time": "0:33:13", "throughput": 5674.71, "total_tokens": 7339072} +{"current_steps": 14910, "total_steps": 37885, "loss": 0.0003, "lr": 1.5194470287851124e-06, "epoch": 1.9677972812458757, "percentage": 39.36, "elapsed_time": "0:21:33", "remaining_time": "0:33:13", "throughput": 5675.15, "total_tokens": 7341504} +{"current_steps": 14915, "total_steps": 37885, "loss": 0.149, "lr": 1.5190533059045284e-06, "epoch": 1.9684571730236242, "percentage": 39.37, "elapsed_time": "0:21:33", "remaining_time": "0:33:12", "throughput": 5675.59, "total_tokens": 7343936} +{"current_steps": 14920, "total_steps": 37885, "loss": 0.1659, "lr": 1.5186594728586383e-06, "epoch": 1.9691170648013725, "percentage": 39.38, "elapsed_time": "0:21:34", "remaining_time": "0:33:12", "throughput": 5676.03, "total_tokens": 7346368} +{"current_steps": 14925, "total_steps": 37885, "loss": 0.0452, "lr": 1.518265529731031e-06, "epoch": 1.969776956579121, "percentage": 39.4, "elapsed_time": "0:21:34", "remaining_time": "0:33:11", "throughput": 5676.47, "total_tokens": 7348800} +{"current_steps": 14930, "total_steps": 37885, "loss": 0.1882, "lr": 1.5178714766053185e-06, "epoch": 1.9704368483568695, "percentage": 39.41, "elapsed_time": "0:21:34", "remaining_time": "0:33:10", "throughput": 5676.63, "total_tokens": 7350848} +{"current_steps": 14935, "total_steps": 37885, "loss": 0.1838, "lr": 1.5174773135651347e-06, "epoch": 1.9710967401346178, "percentage": 39.42, "elapsed_time": "0:21:35", "remaining_time": "0:33:10", "throughput": 5677.1, "total_tokens": 7353344} +{"current_steps": 14940, "total_steps": 37885, "loss": 0.0747, "lr": 1.5170830406941386e-06, "epoch": 1.9717566319123665, "percentage": 39.44, "elapsed_time": "0:21:35", "remaining_time": "0:33:09", "throughput": 5677.53, "total_tokens": 7355776} +{"current_steps": 14945, "total_steps": 37885, "loss": 0.003, "lr": 1.5166886580760114e-06, "epoch": 1.9724165236901148, "percentage": 39.45, "elapsed_time": "0:21:35", "remaining_time": "0:33:09", "throughput": 5678.33, "total_tokens": 7358720} +{"current_steps": 14950, "total_steps": 37885, "loss": 0.0494, "lr": 1.5162941657944584e-06, "epoch": 1.9730764154678633, "percentage": 39.46, "elapsed_time": "0:21:36", "remaining_time": "0:33:08", "throughput": 5678.81, "total_tokens": 7361216} +{"current_steps": 14955, "total_steps": 37885, "loss": 0.0556, "lr": 1.5158995639332073e-06, "epoch": 1.9737363072456118, "percentage": 39.47, "elapsed_time": "0:21:36", "remaining_time": "0:33:08", "throughput": 5679.43, "total_tokens": 7363904} +{"current_steps": 14960, "total_steps": 37885, "loss": 0.1777, "lr": 1.5155048525760095e-06, "epoch": 1.97439619902336, "percentage": 39.49, "elapsed_time": "0:21:36", "remaining_time": "0:33:07", "throughput": 5679.91, "total_tokens": 7366400} +{"current_steps": 14965, "total_steps": 37885, "loss": 0.0443, "lr": 1.5151100318066396e-06, "epoch": 1.9750560908011088, "percentage": 39.5, "elapsed_time": "0:21:37", "remaining_time": "0:33:06", "throughput": 5680.39, "total_tokens": 7368896} +{"current_steps": 14970, "total_steps": 37885, "loss": 0.1217, "lr": 1.5147151017088958e-06, "epoch": 1.975715982578857, "percentage": 39.51, "elapsed_time": "0:21:37", "remaining_time": "0:33:06", "throughput": 5680.87, "total_tokens": 7371392} +{"current_steps": 14975, "total_steps": 37885, "loss": 0.0033, "lr": 1.514320062366599e-06, "epoch": 1.9763758743566056, "percentage": 39.53, "elapsed_time": "0:21:37", "remaining_time": "0:33:05", "throughput": 5681.35, "total_tokens": 7373888} +{"current_steps": 14980, "total_steps": 37885, "loss": 0.0015, "lr": 1.513924913863593e-06, "epoch": 1.977035766134354, "percentage": 39.54, "elapsed_time": "0:21:38", "remaining_time": "0:33:05", "throughput": 5681.86, "total_tokens": 7376448} +{"current_steps": 14985, "total_steps": 37885, "loss": 0.1575, "lr": 1.513529656283746e-06, "epoch": 1.9776956579121023, "percentage": 39.55, "elapsed_time": "0:21:38", "remaining_time": "0:33:04", "throughput": 5682.12, "total_tokens": 7378624} +{"current_steps": 14990, "total_steps": 37885, "loss": 0.0303, "lr": 1.513134289710948e-06, "epoch": 1.9783555496898508, "percentage": 39.57, "elapsed_time": "0:21:38", "remaining_time": "0:33:03", "throughput": 5682.66, "total_tokens": 7381184} +{"current_steps": 14995, "total_steps": 37885, "loss": 0.0846, "lr": 1.5127388142291126e-06, "epoch": 1.9790154414675993, "percentage": 39.58, "elapsed_time": "0:21:39", "remaining_time": "0:33:03", "throughput": 5683.0, "total_tokens": 7383488} +{"current_steps": 15000, "total_steps": 37885, "loss": 0.0485, "lr": 1.5123432299221772e-06, "epoch": 1.9796753332453476, "percentage": 39.59, "elapsed_time": "0:21:39", "remaining_time": "0:33:02", "throughput": 5683.35, "total_tokens": 7385792} +{"current_steps": 15005, "total_steps": 37885, "loss": 0.0013, "lr": 1.5119475368741013e-06, "epoch": 1.9803352250230963, "percentage": 39.61, "elapsed_time": "0:21:39", "remaining_time": "0:33:02", "throughput": 5683.91, "total_tokens": 7388416} +{"current_steps": 15010, "total_steps": 37885, "loss": 0.0646, "lr": 1.5115517351688679e-06, "epoch": 1.9809951168008446, "percentage": 39.62, "elapsed_time": "0:21:40", "remaining_time": "0:33:01", "throughput": 5684.26, "total_tokens": 7390720} +{"current_steps": 15015, "total_steps": 37885, "loss": 0.0839, "lr": 1.5111558248904829e-06, "epoch": 1.9816550085785931, "percentage": 39.63, "elapsed_time": "0:21:40", "remaining_time": "0:33:00", "throughput": 5684.7, "total_tokens": 7393152} +{"current_steps": 15020, "total_steps": 37885, "loss": 0.1332, "lr": 1.5107598061229755e-06, "epoch": 1.9823149003563416, "percentage": 39.65, "elapsed_time": "0:21:40", "remaining_time": "0:33:00", "throughput": 5684.94, "total_tokens": 7395328} +{"current_steps": 15025, "total_steps": 37885, "loss": 0.1803, "lr": 1.510363678950398e-06, "epoch": 1.98297479213409, "percentage": 39.66, "elapsed_time": "0:21:41", "remaining_time": "0:32:59", "throughput": 5685.34, "total_tokens": 7397696} +{"current_steps": 15030, "total_steps": 37885, "loss": 0.0009, "lr": 1.509967443456826e-06, "epoch": 1.9836346839118386, "percentage": 39.67, "elapsed_time": "0:21:41", "remaining_time": "0:32:59", "throughput": 5685.63, "total_tokens": 7399936} +{"current_steps": 15035, "total_steps": 37885, "loss": 0.0032, "lr": 1.5095710997263562e-06, "epoch": 1.984294575689587, "percentage": 39.69, "elapsed_time": "0:21:41", "remaining_time": "0:32:58", "throughput": 5685.83, "total_tokens": 7402048} +{"current_steps": 15040, "total_steps": 37885, "loss": 0.0739, "lr": 1.509174647843111e-06, "epoch": 1.9849544674673354, "percentage": 39.7, "elapsed_time": "0:21:42", "remaining_time": "0:32:57", "throughput": 5686.13, "total_tokens": 7404288} +{"current_steps": 15045, "total_steps": 37885, "loss": 0.0578, "lr": 1.5087780878912335e-06, "epoch": 1.985614359245084, "percentage": 39.71, "elapsed_time": "0:21:42", "remaining_time": "0:32:57", "throughput": 5686.42, "total_tokens": 7406528} +{"current_steps": 15050, "total_steps": 37885, "loss": 0.0584, "lr": 1.5083814199548912e-06, "epoch": 1.9862742510228322, "percentage": 39.73, "elapsed_time": "0:21:42", "remaining_time": "0:32:56", "throughput": 5686.76, "total_tokens": 7408832} +{"current_steps": 15055, "total_steps": 37885, "loss": 0.144, "lr": 1.5079846441182744e-06, "epoch": 1.9869341428005807, "percentage": 39.74, "elapsed_time": "0:21:43", "remaining_time": "0:32:56", "throughput": 5687.43, "total_tokens": 7411584} +{"current_steps": 15060, "total_steps": 37885, "loss": 0.0525, "lr": 1.5075877604655948e-06, "epoch": 1.9875940345783292, "percentage": 39.75, "elapsed_time": "0:21:43", "remaining_time": "0:32:55", "throughput": 5687.91, "total_tokens": 7414080} +{"current_steps": 15065, "total_steps": 37885, "loss": 0.0009, "lr": 1.5071907690810892e-06, "epoch": 1.9882539263560775, "percentage": 39.77, "elapsed_time": "0:21:43", "remaining_time": "0:32:54", "throughput": 5688.24, "total_tokens": 7416384} +{"current_steps": 15070, "total_steps": 37885, "loss": 0.1545, "lr": 1.5067936700490153e-06, "epoch": 1.9889138181338262, "percentage": 39.78, "elapsed_time": "0:21:44", "remaining_time": "0:32:54", "throughput": 5688.62, "total_tokens": 7418752} +{"current_steps": 15075, "total_steps": 37885, "loss": 0.0006, "lr": 1.5063964634536553e-06, "epoch": 1.9895737099115745, "percentage": 39.79, "elapsed_time": "0:21:44", "remaining_time": "0:32:53", "throughput": 5688.92, "total_tokens": 7420992} +{"current_steps": 15080, "total_steps": 37885, "loss": 0.2178, "lr": 1.5059991493793124e-06, "epoch": 1.990233601689323, "percentage": 39.8, "elapsed_time": "0:21:44", "remaining_time": "0:32:53", "throughput": 5689.37, "total_tokens": 7423488} +{"current_steps": 15085, "total_steps": 37885, "loss": 0.095, "lr": 1.5056017279103146e-06, "epoch": 1.9908934934670715, "percentage": 39.82, "elapsed_time": "0:21:45", "remaining_time": "0:32:52", "throughput": 5689.82, "total_tokens": 7425920} +{"current_steps": 15090, "total_steps": 37885, "loss": 0.0005, "lr": 1.505204199131011e-06, "epoch": 1.9915533852448197, "percentage": 39.83, "elapsed_time": "0:21:45", "remaining_time": "0:32:52", "throughput": 5690.42, "total_tokens": 7428608} +{"current_steps": 15095, "total_steps": 37885, "loss": 0.045, "lr": 1.5048065631257748e-06, "epoch": 1.9922132770225685, "percentage": 39.84, "elapsed_time": "0:21:45", "remaining_time": "0:32:51", "throughput": 5690.94, "total_tokens": 7431168} +{"current_steps": 15100, "total_steps": 37885, "loss": 0.0011, "lr": 1.5044088199790012e-06, "epoch": 1.9928731688003167, "percentage": 39.86, "elapsed_time": "0:21:46", "remaining_time": "0:32:50", "throughput": 5691.46, "total_tokens": 7433728} +{"current_steps": 15105, "total_steps": 37885, "loss": 0.1167, "lr": 1.5040109697751082e-06, "epoch": 1.9935330605780652, "percentage": 39.87, "elapsed_time": "0:21:46", "remaining_time": "0:32:50", "throughput": 5692.02, "total_tokens": 7436352} +{"current_steps": 15110, "total_steps": 37885, "loss": 0.1322, "lr": 1.5036130125985364e-06, "epoch": 1.9941929523558137, "percentage": 39.88, "elapsed_time": "0:21:46", "remaining_time": "0:32:49", "throughput": 5692.46, "total_tokens": 7438784} +{"current_steps": 15115, "total_steps": 37885, "loss": 0.0004, "lr": 1.5032149485337494e-06, "epoch": 1.994852844133562, "percentage": 39.9, "elapsed_time": "0:21:47", "remaining_time": "0:32:49", "throughput": 5692.85, "total_tokens": 7441152} +{"current_steps": 15120, "total_steps": 37885, "loss": 0.1143, "lr": 1.5028167776652339e-06, "epoch": 1.9955127359113105, "percentage": 39.91, "elapsed_time": "0:21:47", "remaining_time": "0:32:48", "throughput": 5693.19, "total_tokens": 7443456} +{"current_steps": 15125, "total_steps": 37885, "loss": 0.0071, "lr": 1.5024185000774984e-06, "epoch": 1.996172627689059, "percentage": 39.92, "elapsed_time": "0:21:47", "remaining_time": "0:32:47", "throughput": 5693.73, "total_tokens": 7446016} +{"current_steps": 15130, "total_steps": 37885, "loss": 0.172, "lr": 1.5020201158550745e-06, "epoch": 1.9968325194668073, "percentage": 39.94, "elapsed_time": "0:21:48", "remaining_time": "0:32:47", "throughput": 5694.17, "total_tokens": 7448448} +{"current_steps": 15135, "total_steps": 37885, "loss": 0.1166, "lr": 1.5016216250825164e-06, "epoch": 1.997492411244556, "percentage": 39.95, "elapsed_time": "0:21:48", "remaining_time": "0:32:46", "throughput": 5694.69, "total_tokens": 7451008} +{"current_steps": 15140, "total_steps": 37885, "loss": 0.0687, "lr": 1.5012230278444005e-06, "epoch": 1.9981523030223043, "percentage": 39.96, "elapsed_time": "0:21:48", "remaining_time": "0:32:46", "throughput": 5695.17, "total_tokens": 7453504} +{"current_steps": 15145, "total_steps": 37885, "loss": 0.0024, "lr": 1.5008243242253269e-06, "epoch": 1.9988121948000528, "percentage": 39.98, "elapsed_time": "0:21:49", "remaining_time": "0:32:45", "throughput": 5695.64, "total_tokens": 7456000} +{"current_steps": 15150, "total_steps": 37885, "loss": 0.0027, "lr": 1.5004255143099167e-06, "epoch": 1.9994720865778013, "percentage": 39.99, "elapsed_time": "0:21:49", "remaining_time": "0:32:44", "throughput": 5696.13, "total_tokens": 7458496} +{"current_steps": 15155, "total_steps": 37885, "loss": 0.0349, "lr": 1.5000265981828153e-06, "epoch": 2.0001319783555496, "percentage": 40.0, "elapsed_time": "0:21:49", "remaining_time": "0:32:44", "throughput": 5696.1, "total_tokens": 7460784} +{"current_steps": 15160, "total_steps": 37885, "loss": 0.0004, "lr": 1.4996275759286894e-06, "epoch": 2.0007918701332983, "percentage": 40.02, "elapsed_time": "0:21:50", "remaining_time": "0:32:43", "throughput": 5696.39, "total_tokens": 7463024} +{"current_steps": 15160, "total_steps": 37885, "eval_loss": 0.1182011216878891, "epoch": 2.0007918701332983, "percentage": 40.02, "elapsed_time": "0:21:58", "remaining_time": "0:32:55", "throughput": 5661.91, "total_tokens": 7463024} +{"current_steps": 15165, "total_steps": 37885, "loss": 0.0012, "lr": 1.4992284476322283e-06, "epoch": 2.0014517619110466, "percentage": 40.03, "elapsed_time": "0:22:38", "remaining_time": "0:33:55", "throughput": 5496.1, "total_tokens": 7465456} +{"current_steps": 15170, "total_steps": 37885, "loss": 0.0007, "lr": 1.4988292133781445e-06, "epoch": 2.002111653688795, "percentage": 40.04, "elapsed_time": "0:22:38", "remaining_time": "0:33:54", "throughput": 5496.39, "total_tokens": 7467632} +{"current_steps": 15175, "total_steps": 37885, "loss": 0.0406, "lr": 1.498429873251172e-06, "epoch": 2.0027715454665436, "percentage": 40.06, "elapsed_time": "0:22:38", "remaining_time": "0:33:53", "throughput": 5496.93, "total_tokens": 7470192} +{"current_steps": 15180, "total_steps": 37885, "loss": 0.1068, "lr": 1.4980304273360686e-06, "epoch": 2.003431437244292, "percentage": 40.07, "elapsed_time": "0:22:39", "remaining_time": "0:33:53", "throughput": 5497.25, "total_tokens": 7472432} +{"current_steps": 15185, "total_steps": 37885, "loss": 0.0002, "lr": 1.4976308757176135e-06, "epoch": 2.0040913290220406, "percentage": 40.08, "elapsed_time": "0:22:39", "remaining_time": "0:33:52", "throughput": 5497.75, "total_tokens": 7474928} +{"current_steps": 15190, "total_steps": 37885, "loss": 0.0004, "lr": 1.4972312184806084e-06, "epoch": 2.004751220799789, "percentage": 40.1, "elapsed_time": "0:22:39", "remaining_time": "0:33:51", "throughput": 5498.17, "total_tokens": 7477296} +{"current_steps": 15195, "total_steps": 37885, "loss": 0.2538, "lr": 1.496831455709878e-06, "epoch": 2.005411112577537, "percentage": 40.11, "elapsed_time": "0:22:40", "remaining_time": "0:33:51", "throughput": 5498.61, "total_tokens": 7479728} +{"current_steps": 15200, "total_steps": 37885, "loss": 0.0003, "lr": 1.4964315874902687e-06, "epoch": 2.006071004355286, "percentage": 40.12, "elapsed_time": "0:22:40", "remaining_time": "0:33:50", "throughput": 5499.02, "total_tokens": 7482096} +{"current_steps": 15205, "total_steps": 37885, "loss": 0.0712, "lr": 1.49603161390665e-06, "epoch": 2.006730896133034, "percentage": 40.13, "elapsed_time": "0:22:40", "remaining_time": "0:33:50", "throughput": 5499.38, "total_tokens": 7484400} +{"current_steps": 15210, "total_steps": 37885, "loss": 0.0005, "lr": 1.495631535043913e-06, "epoch": 2.007390787910783, "percentage": 40.15, "elapsed_time": "0:22:41", "remaining_time": "0:33:49", "throughput": 5499.76, "total_tokens": 7486704} +{"current_steps": 15215, "total_steps": 37885, "loss": 0.0001, "lr": 1.4952313509869722e-06, "epoch": 2.008050679688531, "percentage": 40.16, "elapsed_time": "0:22:41", "remaining_time": "0:33:48", "throughput": 5500.27, "total_tokens": 7489200} +{"current_steps": 15220, "total_steps": 37885, "loss": 0.0876, "lr": 1.4948310618207628e-06, "epoch": 2.0087105714662794, "percentage": 40.17, "elapsed_time": "0:22:41", "remaining_time": "0:33:48", "throughput": 5500.65, "total_tokens": 7491504} +{"current_steps": 15225, "total_steps": 37885, "loss": 0.0002, "lr": 1.4944306676302442e-06, "epoch": 2.009370463244028, "percentage": 40.19, "elapsed_time": "0:22:42", "remaining_time": "0:33:47", "throughput": 5501.07, "total_tokens": 7493872} +{"current_steps": 15230, "total_steps": 37885, "loss": 0.0005, "lr": 1.4940301685003967e-06, "epoch": 2.0100303550217764, "percentage": 40.2, "elapsed_time": "0:22:42", "remaining_time": "0:33:46", "throughput": 5501.42, "total_tokens": 7496176} +{"current_steps": 15235, "total_steps": 37885, "loss": 0.0935, "lr": 1.4936295645162232e-06, "epoch": 2.0106902467995247, "percentage": 40.21, "elapsed_time": "0:22:42", "remaining_time": "0:33:46", "throughput": 5502.05, "total_tokens": 7498864} +{"current_steps": 15240, "total_steps": 37885, "loss": 0.0003, "lr": 1.4932288557627497e-06, "epoch": 2.0113501385772734, "percentage": 40.23, "elapsed_time": "0:22:43", "remaining_time": "0:33:45", "throughput": 5502.52, "total_tokens": 7501296} +{"current_steps": 15245, "total_steps": 37885, "loss": 0.0768, "lr": 1.4928280423250228e-06, "epoch": 2.0120100303550217, "percentage": 40.24, "elapsed_time": "0:22:43", "remaining_time": "0:33:45", "throughput": 5503.06, "total_tokens": 7503856} +{"current_steps": 15250, "total_steps": 37885, "loss": 0.0711, "lr": 1.4924271242881128e-06, "epoch": 2.0126699221327704, "percentage": 40.25, "elapsed_time": "0:22:43", "remaining_time": "0:33:44", "throughput": 5503.56, "total_tokens": 7506352} +{"current_steps": 15255, "total_steps": 37885, "loss": 0.0002, "lr": 1.4920261017371116e-06, "epoch": 2.0133298139105187, "percentage": 40.27, "elapsed_time": "0:22:44", "remaining_time": "0:33:43", "throughput": 5504.1, "total_tokens": 7508912} +{"current_steps": 15260, "total_steps": 37885, "loss": 0.0002, "lr": 1.4916249747571333e-06, "epoch": 2.013989705688267, "percentage": 40.28, "elapsed_time": "0:22:44", "remaining_time": "0:33:43", "throughput": 5504.51, "total_tokens": 7511280} +{"current_steps": 15265, "total_steps": 37885, "loss": 0.0789, "lr": 1.4912237434333142e-06, "epoch": 2.0146495974660157, "percentage": 40.29, "elapsed_time": "0:22:44", "remaining_time": "0:33:42", "throughput": 5505.11, "total_tokens": 7513904} +{"current_steps": 15270, "total_steps": 37885, "loss": 0.0778, "lr": 1.4908224078508125e-06, "epoch": 2.015309489243764, "percentage": 40.31, "elapsed_time": "0:22:45", "remaining_time": "0:33:41", "throughput": 5505.81, "total_tokens": 7516720} +{"current_steps": 15275, "total_steps": 37885, "loss": 0.0883, "lr": 1.4904209680948092e-06, "epoch": 2.0159693810215127, "percentage": 40.32, "elapsed_time": "0:22:45", "remaining_time": "0:33:41", "throughput": 5506.27, "total_tokens": 7519152} +{"current_steps": 15280, "total_steps": 37885, "loss": 0.0014, "lr": 1.4900194242505067e-06, "epoch": 2.016629272799261, "percentage": 40.33, "elapsed_time": "0:22:45", "remaining_time": "0:33:40", "throughput": 5506.76, "total_tokens": 7521648} +{"current_steps": 15285, "total_steps": 37885, "loss": 0.0005, "lr": 1.48961777640313e-06, "epoch": 2.0172891645770092, "percentage": 40.35, "elapsed_time": "0:22:46", "remaining_time": "0:33:40", "throughput": 5507.27, "total_tokens": 7524144} +{"current_steps": 15290, "total_steps": 37885, "loss": 0.0538, "lr": 1.4892160246379257e-06, "epoch": 2.017949056354758, "percentage": 40.36, "elapsed_time": "0:22:46", "remaining_time": "0:33:39", "throughput": 5507.81, "total_tokens": 7526704} +{"current_steps": 15295, "total_steps": 37885, "loss": 0.0006, "lr": 1.4888141690401628e-06, "epoch": 2.0186089481325062, "percentage": 40.37, "elapsed_time": "0:22:46", "remaining_time": "0:33:38", "throughput": 5508.35, "total_tokens": 7529264} +{"current_steps": 15300, "total_steps": 37885, "loss": 0.0493, "lr": 1.488412209695132e-06, "epoch": 2.0192688399102545, "percentage": 40.39, "elapsed_time": "0:22:47", "remaining_time": "0:33:38", "throughput": 5508.81, "total_tokens": 7531696} +{"current_steps": 15305, "total_steps": 37885, "loss": 0.0108, "lr": 1.4880101466881464e-06, "epoch": 2.0199287316880032, "percentage": 40.4, "elapsed_time": "0:22:47", "remaining_time": "0:33:37", "throughput": 5509.08, "total_tokens": 7533872} +{"current_steps": 15310, "total_steps": 37885, "loss": 0.0706, "lr": 1.4876079801045418e-06, "epoch": 2.0205886234657515, "percentage": 40.41, "elapsed_time": "0:22:47", "remaining_time": "0:33:36", "throughput": 5509.27, "total_tokens": 7535920} +{"current_steps": 15315, "total_steps": 37885, "loss": 0.0015, "lr": 1.4872057100296738e-06, "epoch": 2.0212485152435002, "percentage": 40.42, "elapsed_time": "0:22:48", "remaining_time": "0:33:36", "throughput": 5509.68, "total_tokens": 7538288} +{"current_steps": 15320, "total_steps": 37885, "loss": 0.0009, "lr": 1.4868033365489222e-06, "epoch": 2.0219084070212485, "percentage": 40.44, "elapsed_time": "0:22:48", "remaining_time": "0:33:35", "throughput": 5510.0, "total_tokens": 7540528} +{"current_steps": 15325, "total_steps": 37885, "loss": 0.0521, "lr": 1.4864008597476873e-06, "epoch": 2.022568298798997, "percentage": 40.45, "elapsed_time": "0:22:48", "remaining_time": "0:33:35", "throughput": 5510.49, "total_tokens": 7543024} +{"current_steps": 15330, "total_steps": 37885, "loss": 0.0005, "lr": 1.4859982797113923e-06, "epoch": 2.0232281905767455, "percentage": 40.46, "elapsed_time": "0:22:49", "remaining_time": "0:33:34", "throughput": 5510.9, "total_tokens": 7545392} +{"current_steps": 15335, "total_steps": 37885, "loss": 0.0533, "lr": 1.4855955965254816e-06, "epoch": 2.023888082354494, "percentage": 40.48, "elapsed_time": "0:22:49", "remaining_time": "0:33:33", "throughput": 5511.21, "total_tokens": 7547632} +{"current_steps": 15340, "total_steps": 37885, "loss": 0.0006, "lr": 1.485192810275422e-06, "epoch": 2.0245479741322425, "percentage": 40.49, "elapsed_time": "0:22:49", "remaining_time": "0:33:33", "throughput": 5511.74, "total_tokens": 7550192} +{"current_steps": 15345, "total_steps": 37885, "loss": 0.096, "lr": 1.4847899210467021e-06, "epoch": 2.025207865909991, "percentage": 40.5, "elapsed_time": "0:22:50", "remaining_time": "0:33:32", "throughput": 5512.23, "total_tokens": 7552688} +{"current_steps": 15350, "total_steps": 37885, "loss": 0.1187, "lr": 1.4843869289248318e-06, "epoch": 2.025867757687739, "percentage": 40.52, "elapsed_time": "0:22:50", "remaining_time": "0:33:32", "throughput": 5512.81, "total_tokens": 7555312} +{"current_steps": 15355, "total_steps": 37885, "loss": 0.0001, "lr": 1.483983833995344e-06, "epoch": 2.026527649465488, "percentage": 40.53, "elapsed_time": "0:22:50", "remaining_time": "0:33:31", "throughput": 5513.48, "total_tokens": 7558128} +{"current_steps": 15360, "total_steps": 37885, "loss": 0.0011, "lr": 1.4835806363437915e-06, "epoch": 2.027187541243236, "percentage": 40.54, "elapsed_time": "0:22:51", "remaining_time": "0:33:30", "throughput": 5513.77, "total_tokens": 7560368} +{"current_steps": 15365, "total_steps": 37885, "loss": 0.0002, "lr": 1.4831773360557513e-06, "epoch": 2.0278474330209844, "percentage": 40.56, "elapsed_time": "0:22:51", "remaining_time": "0:33:30", "throughput": 5514.18, "total_tokens": 7562736} +{"current_steps": 15370, "total_steps": 37885, "loss": 0.0009, "lr": 1.4827739332168201e-06, "epoch": 2.028507324798733, "percentage": 40.57, "elapsed_time": "0:22:51", "remaining_time": "0:33:29", "throughput": 5514.79, "total_tokens": 7565424} +{"current_steps": 15375, "total_steps": 37885, "loss": 0.0006, "lr": 1.4823704279126172e-06, "epoch": 2.0291672165764814, "percentage": 40.58, "elapsed_time": "0:22:52", "remaining_time": "0:33:28", "throughput": 5515.23, "total_tokens": 7567856} +{"current_steps": 15380, "total_steps": 37885, "loss": 0.0433, "lr": 1.4819668202287847e-06, "epoch": 2.02982710835423, "percentage": 40.6, "elapsed_time": "0:22:52", "remaining_time": "0:33:28", "throughput": 5515.72, "total_tokens": 7570352} +{"current_steps": 15385, "total_steps": 37885, "loss": 0.0006, "lr": 1.4815631102509843e-06, "epoch": 2.0304870001319784, "percentage": 40.61, "elapsed_time": "0:22:52", "remaining_time": "0:33:27", "throughput": 5516.12, "total_tokens": 7572720} +{"current_steps": 15390, "total_steps": 37885, "loss": 0.0006, "lr": 1.4811592980649014e-06, "epoch": 2.0311468919097266, "percentage": 40.62, "elapsed_time": "0:22:53", "remaining_time": "0:33:27", "throughput": 5516.44, "total_tokens": 7574960} +{"current_steps": 15395, "total_steps": 37885, "loss": 0.0002, "lr": 1.4807553837562415e-06, "epoch": 2.0318067836874754, "percentage": 40.64, "elapsed_time": "0:22:53", "remaining_time": "0:33:26", "throughput": 5517.02, "total_tokens": 7577584} +{"current_steps": 15400, "total_steps": 37885, "loss": 0.0, "lr": 1.4803513674107325e-06, "epoch": 2.0324666754652236, "percentage": 40.65, "elapsed_time": "0:22:53", "remaining_time": "0:33:25", "throughput": 5517.48, "total_tokens": 7580016} +{"current_steps": 15405, "total_steps": 37885, "loss": 0.0846, "lr": 1.4799472491141245e-06, "epoch": 2.0331265672429724, "percentage": 40.66, "elapsed_time": "0:22:54", "remaining_time": "0:33:25", "throughput": 5517.84, "total_tokens": 7582320} +{"current_steps": 15410, "total_steps": 37885, "loss": 0.0432, "lr": 1.4795430289521885e-06, "epoch": 2.0337864590207206, "percentage": 40.68, "elapsed_time": "0:22:54", "remaining_time": "0:33:24", "throughput": 5518.45, "total_tokens": 7585008} +{"current_steps": 15415, "total_steps": 37885, "loss": 0.0021, "lr": 1.479138707010717e-06, "epoch": 2.034446350798469, "percentage": 40.69, "elapsed_time": "0:22:54", "remaining_time": "0:33:24", "throughput": 5519.11, "total_tokens": 7587760} +{"current_steps": 15420, "total_steps": 37885, "loss": 0.0004, "lr": 1.4787342833755245e-06, "epoch": 2.0351062425762176, "percentage": 40.7, "elapsed_time": "0:22:55", "remaining_time": "0:33:23", "throughput": 5519.61, "total_tokens": 7590256} +{"current_steps": 15425, "total_steps": 37885, "loss": 0.0977, "lr": 1.4783297581324472e-06, "epoch": 2.035766134353966, "percentage": 40.72, "elapsed_time": "0:22:55", "remaining_time": "0:33:22", "throughput": 5520.08, "total_tokens": 7592688} +{"current_steps": 15430, "total_steps": 37885, "loss": 0.0257, "lr": 1.4779251313673422e-06, "epoch": 2.036426026131714, "percentage": 40.73, "elapsed_time": "0:22:55", "remaining_time": "0:33:22", "throughput": 5520.51, "total_tokens": 7595120} +{"current_steps": 15435, "total_steps": 37885, "loss": 0.0004, "lr": 1.4775204031660894e-06, "epoch": 2.037085917909463, "percentage": 40.74, "elapsed_time": "0:22:56", "remaining_time": "0:33:21", "throughput": 5521.01, "total_tokens": 7597680} +{"current_steps": 15440, "total_steps": 37885, "loss": 0.0001, "lr": 1.4771155736145888e-06, "epoch": 2.037745809687211, "percentage": 40.75, "elapsed_time": "0:22:56", "remaining_time": "0:33:20", "throughput": 5521.55, "total_tokens": 7600304} +{"current_steps": 15445, "total_steps": 37885, "loss": 0.001, "lr": 1.4767106427987625e-06, "epoch": 2.03840570146496, "percentage": 40.77, "elapsed_time": "0:22:56", "remaining_time": "0:33:20", "throughput": 5522.02, "total_tokens": 7602800} +{"current_steps": 15450, "total_steps": 37885, "loss": 0.0798, "lr": 1.4763056108045549e-06, "epoch": 2.039065593242708, "percentage": 40.78, "elapsed_time": "0:22:57", "remaining_time": "0:33:19", "throughput": 5522.31, "total_tokens": 7605040} +{"current_steps": 15455, "total_steps": 37885, "loss": 0.0007, "lr": 1.4759004777179297e-06, "epoch": 2.0397254850204565, "percentage": 40.79, "elapsed_time": "0:22:57", "remaining_time": "0:33:19", "throughput": 5522.7, "total_tokens": 7607408} +{"current_steps": 15460, "total_steps": 37885, "loss": 0.0612, "lr": 1.475495243624875e-06, "epoch": 2.040385376798205, "percentage": 40.81, "elapsed_time": "0:22:57", "remaining_time": "0:33:18", "throughput": 5523.17, "total_tokens": 7609904} +{"current_steps": 15465, "total_steps": 37885, "loss": 0.0879, "lr": 1.475089908611398e-06, "epoch": 2.0410452685759535, "percentage": 40.82, "elapsed_time": "0:22:58", "remaining_time": "0:33:17", "throughput": 5523.77, "total_tokens": 7612592} +{"current_steps": 15470, "total_steps": 37885, "loss": 0.0002, "lr": 1.4746844727635282e-06, "epoch": 2.041705160353702, "percentage": 40.83, "elapsed_time": "0:22:58", "remaining_time": "0:33:17", "throughput": 5524.29, "total_tokens": 7615152} +{"current_steps": 15475, "total_steps": 37885, "loss": 0.1432, "lr": 1.474278936167316e-06, "epoch": 2.0423650521314505, "percentage": 40.85, "elapsed_time": "0:22:58", "remaining_time": "0:33:16", "throughput": 5524.71, "total_tokens": 7617584} +{"current_steps": 15480, "total_steps": 37885, "loss": 0.0, "lr": 1.4738732989088347e-06, "epoch": 2.0430249439091988, "percentage": 40.86, "elapsed_time": "0:22:59", "remaining_time": "0:33:16", "throughput": 5525.26, "total_tokens": 7620208} +{"current_steps": 15485, "total_steps": 37885, "loss": 0.0001, "lr": 1.4734675610741767e-06, "epoch": 2.0436848356869475, "percentage": 40.87, "elapsed_time": "0:22:59", "remaining_time": "0:33:15", "throughput": 5525.87, "total_tokens": 7622896} +{"current_steps": 15490, "total_steps": 37885, "loss": 0.0506, "lr": 1.4730617227494577e-06, "epoch": 2.0443447274646958, "percentage": 40.89, "elapsed_time": "0:22:59", "remaining_time": "0:33:14", "throughput": 5526.31, "total_tokens": 7625328} +{"current_steps": 15495, "total_steps": 37885, "loss": 0.0549, "lr": 1.4726557840208137e-06, "epoch": 2.045004619242444, "percentage": 40.9, "elapsed_time": "0:23:00", "remaining_time": "0:33:14", "throughput": 5527.02, "total_tokens": 7628208} +{"current_steps": 15500, "total_steps": 37885, "loss": 0.0008, "lr": 1.4722497449744022e-06, "epoch": 2.0456645110201928, "percentage": 40.91, "elapsed_time": "0:23:00", "remaining_time": "0:33:13", "throughput": 5527.46, "total_tokens": 7630640} +{"current_steps": 15505, "total_steps": 37885, "loss": 0.0009, "lr": 1.471843605696402e-06, "epoch": 2.046324402797941, "percentage": 40.93, "elapsed_time": "0:23:00", "remaining_time": "0:33:13", "throughput": 5527.89, "total_tokens": 7633072} +{"current_steps": 15510, "total_steps": 37885, "loss": 0.0002, "lr": 1.4714373662730136e-06, "epoch": 2.0469842945756898, "percentage": 40.94, "elapsed_time": "0:23:01", "remaining_time": "0:33:12", "throughput": 5528.36, "total_tokens": 7635568} +{"current_steps": 15515, "total_steps": 37885, "loss": 0.0002, "lr": 1.4710310267904578e-06, "epoch": 2.047644186353438, "percentage": 40.95, "elapsed_time": "0:23:01", "remaining_time": "0:33:11", "throughput": 5529.01, "total_tokens": 7638320} +{"current_steps": 15520, "total_steps": 37885, "loss": 0.0627, "lr": 1.4706245873349777e-06, "epoch": 2.0483040781311863, "percentage": 40.97, "elapsed_time": "0:23:01", "remaining_time": "0:33:11", "throughput": 5529.25, "total_tokens": 7640496} +{"current_steps": 15525, "total_steps": 37885, "loss": 0.0002, "lr": 1.4702180479928368e-06, "epoch": 2.048963969908935, "percentage": 40.98, "elapsed_time": "0:23:02", "remaining_time": "0:33:10", "throughput": 5529.53, "total_tokens": 7642736} +{"current_steps": 15530, "total_steps": 37885, "loss": 0.0519, "lr": 1.4698114088503203e-06, "epoch": 2.0496238616866833, "percentage": 40.99, "elapsed_time": "0:23:02", "remaining_time": "0:33:10", "throughput": 5529.91, "total_tokens": 7645104} +{"current_steps": 15535, "total_steps": 37885, "loss": 0.0002, "lr": 1.4694046699937341e-06, "epoch": 2.050283753464432, "percentage": 41.01, "elapsed_time": "0:23:02", "remaining_time": "0:33:09", "throughput": 5530.28, "total_tokens": 7647472} +{"current_steps": 15540, "total_steps": 37885, "loss": 0.0015, "lr": 1.4689978315094066e-06, "epoch": 2.0509436452421803, "percentage": 41.02, "elapsed_time": "0:23:03", "remaining_time": "0:33:08", "throughput": 5530.75, "total_tokens": 7649968} +{"current_steps": 15545, "total_steps": 37885, "loss": 0.0712, "lr": 1.468590893483685e-06, "epoch": 2.0516035370199286, "percentage": 41.03, "elapsed_time": "0:23:03", "remaining_time": "0:33:08", "throughput": 5531.29, "total_tokens": 7652592} +{"current_steps": 15550, "total_steps": 37885, "loss": 0.0006, "lr": 1.4681838560029395e-06, "epoch": 2.0522634287976773, "percentage": 41.05, "elapsed_time": "0:23:03", "remaining_time": "0:33:07", "throughput": 5531.84, "total_tokens": 7655216} +{"current_steps": 15555, "total_steps": 37885, "loss": 0.0006, "lr": 1.467776719153561e-06, "epoch": 2.0529233205754256, "percentage": 41.06, "elapsed_time": "0:23:04", "remaining_time": "0:33:07", "throughput": 5532.39, "total_tokens": 7657840} +{"current_steps": 15560, "total_steps": 37885, "loss": 0.0458, "lr": 1.4673694830219613e-06, "epoch": 2.053583212353174, "percentage": 41.07, "elapsed_time": "0:23:04", "remaining_time": "0:33:06", "throughput": 5532.71, "total_tokens": 7660144} +{"current_steps": 15565, "total_steps": 37885, "loss": 0.0001, "lr": 1.466962147694573e-06, "epoch": 2.0542431041309226, "percentage": 41.08, "elapsed_time": "0:23:04", "remaining_time": "0:33:05", "throughput": 5533.22, "total_tokens": 7662704} +{"current_steps": 15570, "total_steps": 37885, "loss": 0.093, "lr": 1.4665547132578508e-06, "epoch": 2.054902995908671, "percentage": 41.1, "elapsed_time": "0:23:05", "remaining_time": "0:33:05", "throughput": 5533.7, "total_tokens": 7665200} +{"current_steps": 15575, "total_steps": 37885, "loss": 0.0002, "lr": 1.466147179798269e-06, "epoch": 2.0555628876864196, "percentage": 41.11, "elapsed_time": "0:23:05", "remaining_time": "0:33:04", "throughput": 5533.94, "total_tokens": 7667376} +{"current_steps": 15580, "total_steps": 37885, "loss": 0.0003, "lr": 1.4657395474023237e-06, "epoch": 2.056222779464168, "percentage": 41.12, "elapsed_time": "0:23:05", "remaining_time": "0:33:04", "throughput": 5534.25, "total_tokens": 7669680} +{"current_steps": 15585, "total_steps": 37885, "loss": 0.0004, "lr": 1.4653318161565325e-06, "epoch": 2.056882671241916, "percentage": 41.14, "elapsed_time": "0:23:06", "remaining_time": "0:33:03", "throughput": 5534.72, "total_tokens": 7672176} +{"current_steps": 15590, "total_steps": 37885, "loss": 0.0523, "lr": 1.4649239861474324e-06, "epoch": 2.057542563019665, "percentage": 41.15, "elapsed_time": "0:23:06", "remaining_time": "0:33:02", "throughput": 5535.27, "total_tokens": 7674800} +{"current_steps": 15595, "total_steps": 37885, "loss": 0.1052, "lr": 1.4645160574615834e-06, "epoch": 2.058202454797413, "percentage": 41.16, "elapsed_time": "0:23:06", "remaining_time": "0:33:02", "throughput": 5535.7, "total_tokens": 7677232} +{"current_steps": 15600, "total_steps": 37885, "loss": 0.0003, "lr": 1.4641080301855648e-06, "epoch": 2.058862346575162, "percentage": 41.18, "elapsed_time": "0:23:07", "remaining_time": "0:33:01", "throughput": 5536.02, "total_tokens": 7679536} +{"current_steps": 15605, "total_steps": 37885, "loss": 0.0002, "lr": 1.4636999044059777e-06, "epoch": 2.05952223835291, "percentage": 41.19, "elapsed_time": "0:23:07", "remaining_time": "0:33:01", "throughput": 5536.37, "total_tokens": 7681840} +{"current_steps": 15610, "total_steps": 37885, "loss": 0.1445, "lr": 1.4632916802094436e-06, "epoch": 2.0601821301306584, "percentage": 41.2, "elapsed_time": "0:23:07", "remaining_time": "0:33:00", "throughput": 5536.94, "total_tokens": 7684528} +{"current_steps": 15615, "total_steps": 37885, "loss": 0.0006, "lr": 1.462883357682605e-06, "epoch": 2.060842021908407, "percentage": 41.22, "elapsed_time": "0:23:08", "remaining_time": "0:32:59", "throughput": 5537.49, "total_tokens": 7687152} +{"current_steps": 15620, "total_steps": 37885, "loss": 0.1174, "lr": 1.4624749369121265e-06, "epoch": 2.0615019136861554, "percentage": 41.23, "elapsed_time": "0:23:08", "remaining_time": "0:32:59", "throughput": 5537.99, "total_tokens": 7689712} +{"current_steps": 15625, "total_steps": 37885, "loss": 0.0001, "lr": 1.4620664179846908e-06, "epoch": 2.0621618054639037, "percentage": 41.24, "elapsed_time": "0:23:08", "remaining_time": "0:32:58", "throughput": 5538.38, "total_tokens": 7692144} +{"current_steps": 15630, "total_steps": 37885, "loss": 0.0006, "lr": 1.4616578009870044e-06, "epoch": 2.0628216972416524, "percentage": 41.26, "elapsed_time": "0:23:09", "remaining_time": "0:32:58", "throughput": 5538.62, "total_tokens": 7694320} +{"current_steps": 15635, "total_steps": 37885, "loss": 0.0008, "lr": 1.4612490860057927e-06, "epoch": 2.0634815890194007, "percentage": 41.27, "elapsed_time": "0:23:09", "remaining_time": "0:32:57", "throughput": 5539.06, "total_tokens": 7696752} +{"current_steps": 15640, "total_steps": 37885, "loss": 0.0002, "lr": 1.4608402731278022e-06, "epoch": 2.0641414807971494, "percentage": 41.28, "elapsed_time": "0:23:09", "remaining_time": "0:32:56", "throughput": 5539.48, "total_tokens": 7699184} +{"current_steps": 15645, "total_steps": 37885, "loss": 0.1332, "lr": 1.4604313624398014e-06, "epoch": 2.0648013725748977, "percentage": 41.3, "elapsed_time": "0:23:10", "remaining_time": "0:32:56", "throughput": 5539.77, "total_tokens": 7701424} +{"current_steps": 15650, "total_steps": 37885, "loss": 0.0006, "lr": 1.4600223540285778e-06, "epoch": 2.065461264352646, "percentage": 41.31, "elapsed_time": "0:23:10", "remaining_time": "0:32:55", "throughput": 5540.19, "total_tokens": 7703856} +{"current_steps": 15655, "total_steps": 37885, "loss": 0.0737, "lr": 1.459613247980941e-06, "epoch": 2.0661211561303947, "percentage": 41.32, "elapsed_time": "0:23:10", "remaining_time": "0:32:55", "throughput": 5540.58, "total_tokens": 7706224} +{"current_steps": 15660, "total_steps": 37885, "loss": 0.0006, "lr": 1.4592040443837203e-06, "epoch": 2.066781047908143, "percentage": 41.34, "elapsed_time": "0:23:11", "remaining_time": "0:32:54", "throughput": 5540.84, "total_tokens": 7708400} +{"current_steps": 15665, "total_steps": 37885, "loss": 0.0015, "lr": 1.458794743323767e-06, "epoch": 2.0674409396858917, "percentage": 41.35, "elapsed_time": "0:23:11", "remaining_time": "0:32:53", "throughput": 5541.34, "total_tokens": 7710960} +{"current_steps": 15670, "total_steps": 37885, "loss": 0.0003, "lr": 1.4583853448879513e-06, "epoch": 2.06810083146364, "percentage": 41.36, "elapsed_time": "0:23:11", "remaining_time": "0:32:53", "throughput": 5541.83, "total_tokens": 7713520} +{"current_steps": 15675, "total_steps": 37885, "loss": 0.0001, "lr": 1.4579758491631655e-06, "epoch": 2.0687607232413883, "percentage": 41.38, "elapsed_time": "0:23:12", "remaining_time": "0:32:52", "throughput": 5542.14, "total_tokens": 7715824} +{"current_steps": 15680, "total_steps": 37885, "loss": 0.0443, "lr": 1.4575662562363222e-06, "epoch": 2.069420615019137, "percentage": 41.39, "elapsed_time": "0:23:12", "remaining_time": "0:32:52", "throughput": 5542.67, "total_tokens": 7718448} +{"current_steps": 15685, "total_steps": 37885, "loss": 0.0005, "lr": 1.4571565661943542e-06, "epoch": 2.0700805067968853, "percentage": 41.4, "elapsed_time": "0:23:12", "remaining_time": "0:32:51", "throughput": 5543.13, "total_tokens": 7720944} +{"current_steps": 15690, "total_steps": 37885, "loss": 0.0007, "lr": 1.456746779124216e-06, "epoch": 2.0707403985746335, "percentage": 41.41, "elapsed_time": "0:23:13", "remaining_time": "0:32:50", "throughput": 5543.79, "total_tokens": 7723760} +{"current_steps": 15695, "total_steps": 37885, "loss": 0.0004, "lr": 1.4563368951128812e-06, "epoch": 2.0714002903523823, "percentage": 41.43, "elapsed_time": "0:23:13", "remaining_time": "0:32:50", "throughput": 5544.24, "total_tokens": 7726256} +{"current_steps": 15700, "total_steps": 37885, "loss": 0.1174, "lr": 1.4559269142473452e-06, "epoch": 2.0720601821301305, "percentage": 41.44, "elapsed_time": "0:23:13", "remaining_time": "0:32:49", "throughput": 5544.96, "total_tokens": 7729136} +{"current_steps": 15705, "total_steps": 37885, "loss": 0.0002, "lr": 1.455516836614623e-06, "epoch": 2.0727200739078793, "percentage": 41.45, "elapsed_time": "0:23:14", "remaining_time": "0:32:49", "throughput": 5545.54, "total_tokens": 7731824} +{"current_steps": 15710, "total_steps": 37885, "loss": 0.1329, "lr": 1.4551066623017505e-06, "epoch": 2.0733799656856275, "percentage": 41.47, "elapsed_time": "0:23:14", "remaining_time": "0:32:48", "throughput": 5545.87, "total_tokens": 7734128} +{"current_steps": 15715, "total_steps": 37885, "loss": 0.072, "lr": 1.4546963913957848e-06, "epoch": 2.074039857463376, "percentage": 41.48, "elapsed_time": "0:23:14", "remaining_time": "0:32:47", "throughput": 5545.97, "total_tokens": 7736112} +{"current_steps": 15720, "total_steps": 37885, "loss": 0.0002, "lr": 1.4542860239838025e-06, "epoch": 2.0746997492411245, "percentage": 41.49, "elapsed_time": "0:23:15", "remaining_time": "0:32:47", "throughput": 5546.52, "total_tokens": 7738736} +{"current_steps": 15725, "total_steps": 37885, "loss": 0.0002, "lr": 1.4538755601529018e-06, "epoch": 2.075359641018873, "percentage": 41.51, "elapsed_time": "0:23:15", "remaining_time": "0:32:46", "throughput": 5547.09, "total_tokens": 7741424} +{"current_steps": 15730, "total_steps": 37885, "loss": 0.0003, "lr": 1.4534649999901999e-06, "epoch": 2.0760195327966215, "percentage": 41.52, "elapsed_time": "0:23:15", "remaining_time": "0:32:46", "throughput": 5547.35, "total_tokens": 7743664} +{"current_steps": 15735, "total_steps": 37885, "loss": 0.0002, "lr": 1.4530543435828355e-06, "epoch": 2.07667942457437, "percentage": 41.53, "elapsed_time": "0:23:16", "remaining_time": "0:32:45", "throughput": 5547.71, "total_tokens": 7746032} +{"current_steps": 15740, "total_steps": 37885, "loss": 0.0567, "lr": 1.4526435910179674e-06, "epoch": 2.077339316352118, "percentage": 41.55, "elapsed_time": "0:23:16", "remaining_time": "0:32:44", "throughput": 5548.02, "total_tokens": 7748336} +{"current_steps": 15745, "total_steps": 37885, "loss": 0.1586, "lr": 1.4522327423827746e-06, "epoch": 2.077999208129867, "percentage": 41.56, "elapsed_time": "0:23:16", "remaining_time": "0:32:44", "throughput": 5548.56, "total_tokens": 7750960} +{"current_steps": 15750, "total_steps": 37885, "loss": 0.0005, "lr": 1.4518217977644576e-06, "epoch": 2.078659099907615, "percentage": 41.57, "elapsed_time": "0:23:17", "remaining_time": "0:32:43", "throughput": 5549.21, "total_tokens": 7753776} +{"current_steps": 15755, "total_steps": 37885, "loss": 0.0007, "lr": 1.4514107572502355e-06, "epoch": 2.079318991685364, "percentage": 41.59, "elapsed_time": "0:23:17", "remaining_time": "0:32:43", "throughput": 5549.73, "total_tokens": 7756400} +{"current_steps": 15760, "total_steps": 37885, "loss": 0.0781, "lr": 1.450999620927349e-06, "epoch": 2.079978883463112, "percentage": 41.6, "elapsed_time": "0:23:17", "remaining_time": "0:32:42", "throughput": 5550.02, "total_tokens": 7758640} +{"current_steps": 15765, "total_steps": 37885, "loss": 0.0003, "lr": 1.4505883888830591e-06, "epoch": 2.0806387752408604, "percentage": 41.61, "elapsed_time": "0:23:18", "remaining_time": "0:32:41", "throughput": 5550.43, "total_tokens": 7761072} +{"current_steps": 15770, "total_steps": 37885, "loss": 0.001, "lr": 1.4501770612046461e-06, "epoch": 2.081298667018609, "percentage": 41.63, "elapsed_time": "0:23:18", "remaining_time": "0:32:41", "throughput": 5550.97, "total_tokens": 7763696} +{"current_steps": 15775, "total_steps": 37885, "loss": 0.0007, "lr": 1.4497656379794126e-06, "epoch": 2.0819585587963574, "percentage": 41.64, "elapsed_time": "0:23:18", "remaining_time": "0:32:40", "throughput": 5551.42, "total_tokens": 7766192} +{"current_steps": 15780, "total_steps": 37885, "loss": 0.0007, "lr": 1.4493541192946785e-06, "epoch": 2.0826184505741057, "percentage": 41.65, "elapsed_time": "0:23:19", "remaining_time": "0:32:40", "throughput": 5551.92, "total_tokens": 7768752} +{"current_steps": 15785, "total_steps": 37885, "loss": 0.0007, "lr": 1.448942505237787e-06, "epoch": 2.0832783423518544, "percentage": 41.67, "elapsed_time": "0:23:19", "remaining_time": "0:32:39", "throughput": 5552.36, "total_tokens": 7771248} +{"current_steps": 15790, "total_steps": 37885, "loss": 0.0, "lr": 1.4485307958960996e-06, "epoch": 2.0839382341296027, "percentage": 41.68, "elapsed_time": "0:23:19", "remaining_time": "0:32:38", "throughput": 5553.01, "total_tokens": 7774064} +{"current_steps": 15795, "total_steps": 37885, "loss": 0.0001, "lr": 1.448118991356999e-06, "epoch": 2.0845981259073514, "percentage": 41.69, "elapsed_time": "0:23:20", "remaining_time": "0:32:38", "throughput": 5553.43, "total_tokens": 7776496} +{"current_steps": 15800, "total_steps": 37885, "loss": 0.1766, "lr": 1.4477070917078876e-06, "epoch": 2.0852580176850997, "percentage": 41.71, "elapsed_time": "0:23:20", "remaining_time": "0:32:37", "throughput": 5553.77, "total_tokens": 7778800} +{"current_steps": 15805, "total_steps": 37885, "loss": 0.0004, "lr": 1.4472950970361878e-06, "epoch": 2.085917909462848, "percentage": 41.72, "elapsed_time": "0:23:20", "remaining_time": "0:32:37", "throughput": 5554.16, "total_tokens": 7781168} +{"current_steps": 15810, "total_steps": 37885, "loss": 0.0002, "lr": 1.4468830074293425e-06, "epoch": 2.0865778012405967, "percentage": 41.73, "elapsed_time": "0:23:21", "remaining_time": "0:32:36", "throughput": 5554.72, "total_tokens": 7783792} +{"current_steps": 15815, "total_steps": 37885, "loss": 0.0104, "lr": 1.4464708229748154e-06, "epoch": 2.087237693018345, "percentage": 41.74, "elapsed_time": "0:23:21", "remaining_time": "0:32:35", "throughput": 5555.2, "total_tokens": 7786288} +{"current_steps": 15820, "total_steps": 37885, "loss": 0.0001, "lr": 1.4460585437600887e-06, "epoch": 2.087897584796093, "percentage": 41.76, "elapsed_time": "0:23:21", "remaining_time": "0:32:35", "throughput": 5555.71, "total_tokens": 7788848} +{"current_steps": 15825, "total_steps": 37885, "loss": 0.0001, "lr": 1.4456461698726666e-06, "epoch": 2.088557476573842, "percentage": 41.77, "elapsed_time": "0:23:22", "remaining_time": "0:32:34", "throughput": 5556.27, "total_tokens": 7791472} +{"current_steps": 15830, "total_steps": 37885, "loss": 0.0002, "lr": 1.445233701400072e-06, "epoch": 2.08921736835159, "percentage": 41.78, "elapsed_time": "0:23:22", "remaining_time": "0:32:34", "throughput": 5556.83, "total_tokens": 7794096} +{"current_steps": 15835, "total_steps": 37885, "loss": 0.0923, "lr": 1.4448211384298482e-06, "epoch": 2.089877260129339, "percentage": 41.8, "elapsed_time": "0:23:22", "remaining_time": "0:32:33", "throughput": 5557.23, "total_tokens": 7796464} +{"current_steps": 15840, "total_steps": 37885, "loss": 0.0005, "lr": 1.4444084810495589e-06, "epoch": 2.090537151907087, "percentage": 41.81, "elapsed_time": "0:23:23", "remaining_time": "0:32:32", "throughput": 5557.61, "total_tokens": 7798832} +{"current_steps": 15845, "total_steps": 37885, "loss": 0.2503, "lr": 1.4439957293467877e-06, "epoch": 2.0911970436848355, "percentage": 41.82, "elapsed_time": "0:23:23", "remaining_time": "0:32:32", "throughput": 5558.17, "total_tokens": 7801456} +{"current_steps": 15850, "total_steps": 37885, "loss": 0.0013, "lr": 1.4435828834091384e-06, "epoch": 2.091856935462584, "percentage": 41.84, "elapsed_time": "0:23:23", "remaining_time": "0:32:31", "throughput": 5558.68, "total_tokens": 7804016} +{"current_steps": 15855, "total_steps": 37885, "loss": 0.0461, "lr": 1.443169943324234e-06, "epoch": 2.0925168272403325, "percentage": 41.85, "elapsed_time": "0:23:24", "remaining_time": "0:32:31", "throughput": 5559.07, "total_tokens": 7806384} +{"current_steps": 15860, "total_steps": 37885, "loss": 0.0001, "lr": 1.4427569091797182e-06, "epoch": 2.0931767190180812, "percentage": 41.86, "elapsed_time": "0:23:24", "remaining_time": "0:32:30", "throughput": 5559.29, "total_tokens": 7808496} +{"current_steps": 15865, "total_steps": 37885, "loss": 0.0003, "lr": 1.442343781063255e-06, "epoch": 2.0938366107958295, "percentage": 41.88, "elapsed_time": "0:23:24", "remaining_time": "0:32:29", "throughput": 5559.76, "total_tokens": 7810992} +{"current_steps": 15870, "total_steps": 37885, "loss": 0.0001, "lr": 1.441930559062527e-06, "epoch": 2.094496502573578, "percentage": 41.89, "elapsed_time": "0:23:25", "remaining_time": "0:32:29", "throughput": 5560.35, "total_tokens": 7813680} +{"current_steps": 15875, "total_steps": 37885, "loss": 0.0673, "lr": 1.4415172432652385e-06, "epoch": 2.0951563943513265, "percentage": 41.9, "elapsed_time": "0:23:25", "remaining_time": "0:32:28", "throughput": 5560.91, "total_tokens": 7816304} +{"current_steps": 15880, "total_steps": 37885, "loss": 0.0001, "lr": 1.441103833759112e-06, "epoch": 2.095816286129075, "percentage": 41.92, "elapsed_time": "0:23:25", "remaining_time": "0:32:28", "throughput": 5561.43, "total_tokens": 7818864} +{"current_steps": 15885, "total_steps": 37885, "loss": 0.0004, "lr": 1.4406903306318913e-06, "epoch": 2.0964761779068235, "percentage": 41.93, "elapsed_time": "0:23:26", "remaining_time": "0:32:27", "throughput": 5561.76, "total_tokens": 7821168} +{"current_steps": 15890, "total_steps": 37885, "loss": 0.0002, "lr": 1.440276733971339e-06, "epoch": 2.097136069684572, "percentage": 41.94, "elapsed_time": "0:23:26", "remaining_time": "0:32:26", "throughput": 5562.4, "total_tokens": 7823920} +{"current_steps": 15895, "total_steps": 37885, "loss": 0.0001, "lr": 1.439863043865238e-06, "epoch": 2.09779596146232, "percentage": 41.96, "elapsed_time": "0:23:26", "remaining_time": "0:32:26", "throughput": 5562.74, "total_tokens": 7826224} +{"current_steps": 15900, "total_steps": 37885, "loss": 0.0, "lr": 1.4394492604013914e-06, "epoch": 2.098455853240069, "percentage": 41.97, "elapsed_time": "0:23:27", "remaining_time": "0:32:25", "throughput": 5563.22, "total_tokens": 7828720} +{"current_steps": 15905, "total_steps": 37885, "loss": 0.0554, "lr": 1.4390353836676217e-06, "epoch": 2.099115745017817, "percentage": 41.98, "elapsed_time": "0:23:27", "remaining_time": "0:32:25", "throughput": 5563.76, "total_tokens": 7831344} +{"current_steps": 15910, "total_steps": 37885, "loss": 0.0002, "lr": 1.4386214137517707e-06, "epoch": 2.0997756367955653, "percentage": 42.0, "elapsed_time": "0:23:27", "remaining_time": "0:32:24", "throughput": 5564.24, "total_tokens": 7833840} +{"current_steps": 15915, "total_steps": 37885, "loss": 0.0659, "lr": 1.438207350741701e-06, "epoch": 2.100435528573314, "percentage": 42.01, "elapsed_time": "0:23:28", "remaining_time": "0:32:23", "throughput": 5564.5, "total_tokens": 7836016} +{"current_steps": 15920, "total_steps": 37885, "loss": 0.0295, "lr": 1.4377931947252943e-06, "epoch": 2.1010954203510623, "percentage": 42.02, "elapsed_time": "0:23:28", "remaining_time": "0:32:23", "throughput": 5565.13, "total_tokens": 7838768} +{"current_steps": 15925, "total_steps": 37885, "loss": 0.0581, "lr": 1.4373789457904522e-06, "epoch": 2.101755312128811, "percentage": 42.04, "elapsed_time": "0:23:28", "remaining_time": "0:32:22", "throughput": 5565.65, "total_tokens": 7841328} +{"current_steps": 15930, "total_steps": 37885, "loss": 0.0002, "lr": 1.4369646040250962e-06, "epoch": 2.1024152039065593, "percentage": 42.05, "elapsed_time": "0:23:29", "remaining_time": "0:32:22", "throughput": 5566.09, "total_tokens": 7843760} +{"current_steps": 15935, "total_steps": 37885, "loss": 0.0908, "lr": 1.4365501695171673e-06, "epoch": 2.1030750956843076, "percentage": 42.06, "elapsed_time": "0:23:29", "remaining_time": "0:32:21", "throughput": 5566.72, "total_tokens": 7846512} +{"current_steps": 15940, "total_steps": 37885, "loss": 0.0002, "lr": 1.436135642354626e-06, "epoch": 2.1037349874620563, "percentage": 42.07, "elapsed_time": "0:23:29", "remaining_time": "0:32:21", "throughput": 5567.23, "total_tokens": 7849072} +{"current_steps": 15945, "total_steps": 37885, "loss": 0.088, "lr": 1.4357210226254533e-06, "epoch": 2.1043948792398046, "percentage": 42.09, "elapsed_time": "0:23:30", "remaining_time": "0:32:20", "throughput": 5567.75, "total_tokens": 7851632} +{"current_steps": 15950, "total_steps": 37885, "loss": 0.0004, "lr": 1.435306310417648e-06, "epoch": 2.105054771017553, "percentage": 42.1, "elapsed_time": "0:23:30", "remaining_time": "0:32:19", "throughput": 5568.17, "total_tokens": 7854064} +{"current_steps": 15955, "total_steps": 37885, "loss": 0.0478, "lr": 1.4348915058192316e-06, "epoch": 2.1057146627953016, "percentage": 42.11, "elapsed_time": "0:23:30", "remaining_time": "0:32:19", "throughput": 5568.77, "total_tokens": 7856752} +{"current_steps": 15960, "total_steps": 37885, "loss": 0.0468, "lr": 1.4344766089182416e-06, "epoch": 2.10637455457305, "percentage": 42.13, "elapsed_time": "0:23:31", "remaining_time": "0:32:18", "throughput": 5569.32, "total_tokens": 7859376} +{"current_steps": 15965, "total_steps": 37885, "loss": 0.0001, "lr": 1.4340616198027377e-06, "epoch": 2.1070344463507986, "percentage": 42.14, "elapsed_time": "0:23:31", "remaining_time": "0:32:18", "throughput": 5569.85, "total_tokens": 7862000} +{"current_steps": 15970, "total_steps": 37885, "loss": 0.0612, "lr": 1.4336465385607982e-06, "epoch": 2.107694338128547, "percentage": 42.15, "elapsed_time": "0:23:31", "remaining_time": "0:32:17", "throughput": 5570.19, "total_tokens": 7864304} +{"current_steps": 15975, "total_steps": 37885, "loss": 0.1067, "lr": 1.433231365280521e-06, "epoch": 2.108354229906295, "percentage": 42.17, "elapsed_time": "0:23:32", "remaining_time": "0:32:16", "throughput": 5570.52, "total_tokens": 7866608} +{"current_steps": 15980, "total_steps": 37885, "loss": 0.0596, "lr": 1.432816100050024e-06, "epoch": 2.109014121684044, "percentage": 42.18, "elapsed_time": "0:23:32", "remaining_time": "0:32:16", "throughput": 5570.95, "total_tokens": 7869040} +{"current_steps": 15985, "total_steps": 37885, "loss": 0.0001, "lr": 1.432400742957444e-06, "epoch": 2.109674013461792, "percentage": 42.19, "elapsed_time": "0:23:32", "remaining_time": "0:32:15", "throughput": 5571.58, "total_tokens": 7871792} +{"current_steps": 15990, "total_steps": 37885, "loss": 0.001, "lr": 1.4319852940909377e-06, "epoch": 2.110333905239541, "percentage": 42.21, "elapsed_time": "0:23:33", "remaining_time": "0:32:15", "throughput": 5571.96, "total_tokens": 7874160} +{"current_steps": 15995, "total_steps": 37885, "loss": 0.0029, "lr": 1.4315697535386804e-06, "epoch": 2.110993797017289, "percentage": 42.22, "elapsed_time": "0:23:33", "remaining_time": "0:32:14", "throughput": 5572.43, "total_tokens": 7876656} +{"current_steps": 16000, "total_steps": 37885, "loss": 0.0551, "lr": 1.4311541213888682e-06, "epoch": 2.1116536887950375, "percentage": 42.23, "elapsed_time": "0:23:33", "remaining_time": "0:32:13", "throughput": 5572.99, "total_tokens": 7879280} +{"current_steps": 16005, "total_steps": 37885, "loss": 0.0002, "lr": 1.430738397729716e-06, "epoch": 2.112313580572786, "percentage": 42.25, "elapsed_time": "0:23:34", "remaining_time": "0:32:13", "throughput": 5573.42, "total_tokens": 7881712} +{"current_steps": 16010, "total_steps": 37885, "loss": 0.0004, "lr": 1.4303225826494583e-06, "epoch": 2.1129734723505345, "percentage": 42.26, "elapsed_time": "0:23:34", "remaining_time": "0:32:12", "throughput": 5573.71, "total_tokens": 7883952} +{"current_steps": 16015, "total_steps": 37885, "loss": 0.0488, "lr": 1.4299066762363484e-06, "epoch": 2.113633364128283, "percentage": 42.27, "elapsed_time": "0:23:34", "remaining_time": "0:32:12", "throughput": 5574.09, "total_tokens": 7886320} +{"current_steps": 16020, "total_steps": 37885, "loss": 0.0273, "lr": 1.4294906785786593e-06, "epoch": 2.1142932559060315, "percentage": 42.29, "elapsed_time": "0:23:35", "remaining_time": "0:32:11", "throughput": 5574.69, "total_tokens": 7889008} +{"current_steps": 16025, "total_steps": 37885, "loss": 0.0751, "lr": 1.429074589764684e-06, "epoch": 2.1149531476837797, "percentage": 42.3, "elapsed_time": "0:23:35", "remaining_time": "0:32:10", "throughput": 5575.07, "total_tokens": 7891376} +{"current_steps": 16030, "total_steps": 37885, "loss": 0.0536, "lr": 1.4286584098827343e-06, "epoch": 2.1156130394615285, "percentage": 42.31, "elapsed_time": "0:23:35", "remaining_time": "0:32:10", "throughput": 5575.38, "total_tokens": 7893616} +{"current_steps": 16035, "total_steps": 37885, "loss": 0.0007, "lr": 1.4282421390211411e-06, "epoch": 2.1162729312392767, "percentage": 42.33, "elapsed_time": "0:23:36", "remaining_time": "0:32:09", "throughput": 5575.76, "total_tokens": 7895984} +{"current_steps": 16040, "total_steps": 37885, "loss": 0.0001, "lr": 1.4278257772682548e-06, "epoch": 2.116932823017025, "percentage": 42.34, "elapsed_time": "0:23:36", "remaining_time": "0:32:09", "throughput": 5576.06, "total_tokens": 7898224} +{"current_steps": 16045, "total_steps": 37885, "loss": 0.0001, "lr": 1.4274093247124456e-06, "epoch": 2.1175927147947737, "percentage": 42.35, "elapsed_time": "0:23:36", "remaining_time": "0:32:08", "throughput": 5576.49, "total_tokens": 7900656} +{"current_steps": 16050, "total_steps": 37885, "loss": 0.0001, "lr": 1.4269927814421023e-06, "epoch": 2.118252606572522, "percentage": 42.37, "elapsed_time": "0:23:37", "remaining_time": "0:32:07", "throughput": 5576.95, "total_tokens": 7903152} +{"current_steps": 16055, "total_steps": 37885, "loss": 0.0001, "lr": 1.426576147545633e-06, "epoch": 2.1189124983502707, "percentage": 42.38, "elapsed_time": "0:23:37", "remaining_time": "0:32:07", "throughput": 5577.33, "total_tokens": 7905520} +{"current_steps": 16060, "total_steps": 37885, "loss": 0.0009, "lr": 1.4261594231114658e-06, "epoch": 2.119572390128019, "percentage": 42.39, "elapsed_time": "0:23:37", "remaining_time": "0:32:06", "throughput": 5577.75, "total_tokens": 7907952} +{"current_steps": 16065, "total_steps": 37885, "loss": 0.1114, "lr": 1.4257426082280466e-06, "epoch": 2.1202322819057673, "percentage": 42.4, "elapsed_time": "0:23:38", "remaining_time": "0:32:06", "throughput": 5578.15, "total_tokens": 7910384} +{"current_steps": 16070, "total_steps": 37885, "loss": 0.009, "lr": 1.4253257029838419e-06, "epoch": 2.120892173683516, "percentage": 42.42, "elapsed_time": "0:23:38", "remaining_time": "0:32:05", "throughput": 5578.6, "total_tokens": 7912880} +{"current_steps": 16075, "total_steps": 37885, "loss": 0.1324, "lr": 1.4249087074673367e-06, "epoch": 2.1215520654612643, "percentage": 42.43, "elapsed_time": "0:23:38", "remaining_time": "0:32:04", "throughput": 5578.94, "total_tokens": 7915184} +{"current_steps": 16080, "total_steps": 37885, "loss": 0.0003, "lr": 1.4244916217670352e-06, "epoch": 2.122211957239013, "percentage": 42.44, "elapsed_time": "0:23:39", "remaining_time": "0:32:04", "throughput": 5579.24, "total_tokens": 7917424} +{"current_steps": 16085, "total_steps": 37885, "loss": 0.0015, "lr": 1.4240744459714612e-06, "epoch": 2.1228718490167613, "percentage": 42.46, "elapsed_time": "0:23:39", "remaining_time": "0:32:03", "throughput": 5579.49, "total_tokens": 7919600} +{"current_steps": 16090, "total_steps": 37885, "loss": 0.0001, "lr": 1.4236571801691568e-06, "epoch": 2.1235317407945096, "percentage": 42.47, "elapsed_time": "0:23:39", "remaining_time": "0:32:03", "throughput": 5580.05, "total_tokens": 7922224} +{"current_steps": 16095, "total_steps": 37885, "loss": 0.0682, "lr": 1.4232398244486835e-06, "epoch": 2.1241916325722583, "percentage": 42.48, "elapsed_time": "0:23:40", "remaining_time": "0:32:02", "throughput": 5580.35, "total_tokens": 7924464} +{"current_steps": 16100, "total_steps": 37885, "loss": 0.0001, "lr": 1.4228223788986226e-06, "epoch": 2.1248515243500066, "percentage": 42.5, "elapsed_time": "0:23:40", "remaining_time": "0:32:01", "throughput": 5580.9, "total_tokens": 7927088} +{"current_steps": 16105, "total_steps": 37885, "loss": 0.1128, "lr": 1.4224048436075738e-06, "epoch": 2.125511416127755, "percentage": 42.51, "elapsed_time": "0:23:40", "remaining_time": "0:32:01", "throughput": 5581.4, "total_tokens": 7929648} +{"current_steps": 16110, "total_steps": 37885, "loss": 0.0001, "lr": 1.4219872186641557e-06, "epoch": 2.1261713079055036, "percentage": 42.52, "elapsed_time": "0:23:41", "remaining_time": "0:32:00", "throughput": 5581.78, "total_tokens": 7932016} +{"current_steps": 16115, "total_steps": 37885, "loss": 0.0535, "lr": 1.421569504157006e-06, "epoch": 2.126831199683252, "percentage": 42.54, "elapsed_time": "0:23:41", "remaining_time": "0:32:00", "throughput": 5582.28, "total_tokens": 7934576} +{"current_steps": 16120, "total_steps": 37885, "loss": 0.0491, "lr": 1.4211517001747818e-06, "epoch": 2.1274910914610006, "percentage": 42.55, "elapsed_time": "0:23:41", "remaining_time": "0:31:59", "throughput": 5582.61, "total_tokens": 7936880} +{"current_steps": 16125, "total_steps": 37885, "loss": 0.0007, "lr": 1.420733806806159e-06, "epoch": 2.128150983238749, "percentage": 42.56, "elapsed_time": "0:23:42", "remaining_time": "0:31:58", "throughput": 5582.99, "total_tokens": 7939248} +{"current_steps": 16130, "total_steps": 37885, "loss": 0.1307, "lr": 1.4203158241398329e-06, "epoch": 2.128810875016497, "percentage": 42.58, "elapsed_time": "0:23:42", "remaining_time": "0:31:58", "throughput": 5583.58, "total_tokens": 7941936} +{"current_steps": 16135, "total_steps": 37885, "loss": 0.1063, "lr": 1.4198977522645162e-06, "epoch": 2.129470766794246, "percentage": 42.59, "elapsed_time": "0:23:42", "remaining_time": "0:31:57", "throughput": 5583.97, "total_tokens": 7944304} +{"current_steps": 16140, "total_steps": 37885, "loss": 0.1061, "lr": 1.4194795912689426e-06, "epoch": 2.130130658571994, "percentage": 42.6, "elapsed_time": "0:23:43", "remaining_time": "0:31:57", "throughput": 5584.27, "total_tokens": 7946544} +{"current_steps": 16145, "total_steps": 37885, "loss": 0.0001, "lr": 1.419061341241863e-06, "epoch": 2.130790550349743, "percentage": 42.62, "elapsed_time": "0:23:43", "remaining_time": "0:31:56", "throughput": 5584.97, "total_tokens": 7949424} +{"current_steps": 16150, "total_steps": 37885, "loss": 0.0001, "lr": 1.4186430022720488e-06, "epoch": 2.131450442127491, "percentage": 42.63, "elapsed_time": "0:23:43", "remaining_time": "0:31:56", "throughput": 5585.4, "total_tokens": 7951856} +{"current_steps": 16155, "total_steps": 37885, "loss": 0.0002, "lr": 1.4182245744482886e-06, "epoch": 2.1321103339052394, "percentage": 42.64, "elapsed_time": "0:23:44", "remaining_time": "0:31:55", "throughput": 5585.6, "total_tokens": 7953968} +{"current_steps": 16160, "total_steps": 37885, "loss": 0.1079, "lr": 1.4178060578593912e-06, "epoch": 2.132770225682988, "percentage": 42.66, "elapsed_time": "0:23:44", "remaining_time": "0:31:54", "throughput": 5586.06, "total_tokens": 7956464} +{"current_steps": 16165, "total_steps": 37885, "loss": 0.0001, "lr": 1.4173874525941836e-06, "epoch": 2.1334301174607364, "percentage": 42.67, "elapsed_time": "0:23:44", "remaining_time": "0:31:54", "throughput": 5586.47, "total_tokens": 7958896} +{"current_steps": 16170, "total_steps": 37885, "loss": 0.0126, "lr": 1.4169687587415114e-06, "epoch": 2.1340900092384847, "percentage": 42.68, "elapsed_time": "0:23:45", "remaining_time": "0:31:53", "throughput": 5586.89, "total_tokens": 7961328} +{"current_steps": 16175, "total_steps": 37885, "loss": 0.0691, "lr": 1.4165499763902399e-06, "epoch": 2.1347499010162334, "percentage": 42.69, "elapsed_time": "0:23:45", "remaining_time": "0:31:53", "throughput": 5587.4, "total_tokens": 7963888} +{"current_steps": 16180, "total_steps": 37885, "loss": 0.0001, "lr": 1.416131105629252e-06, "epoch": 2.1354097927939817, "percentage": 42.71, "elapsed_time": "0:23:45", "remaining_time": "0:31:52", "throughput": 5587.95, "total_tokens": 7966512} +{"current_steps": 16185, "total_steps": 37885, "loss": 0.0002, "lr": 1.4157121465474504e-06, "epoch": 2.1360696845717304, "percentage": 42.72, "elapsed_time": "0:23:45", "remaining_time": "0:31:51", "throughput": 5588.37, "total_tokens": 7968944} +{"current_steps": 16190, "total_steps": 37885, "loss": 0.0018, "lr": 1.4152930992337562e-06, "epoch": 2.1367295763494787, "percentage": 42.73, "elapsed_time": "0:23:46", "remaining_time": "0:31:51", "throughput": 5588.83, "total_tokens": 7971440} +{"current_steps": 16195, "total_steps": 37885, "loss": 0.0001, "lr": 1.4148739637771088e-06, "epoch": 2.137389468127227, "percentage": 42.75, "elapsed_time": "0:23:46", "remaining_time": "0:31:50", "throughput": 5589.16, "total_tokens": 7973744} +{"current_steps": 16200, "total_steps": 37885, "loss": 0.0523, "lr": 1.4144547402664674e-06, "epoch": 2.1380493599049757, "percentage": 42.76, "elapsed_time": "0:23:46", "remaining_time": "0:31:50", "throughput": 5589.39, "total_tokens": 7975920} +{"current_steps": 16205, "total_steps": 37885, "loss": 0.0004, "lr": 1.4140354287908079e-06, "epoch": 2.138709251682724, "percentage": 42.77, "elapsed_time": "0:23:47", "remaining_time": "0:31:49", "throughput": 5589.74, "total_tokens": 7978224} +{"current_steps": 16210, "total_steps": 37885, "loss": 0.0001, "lr": 1.4136160294391272e-06, "epoch": 2.1393691434604727, "percentage": 42.79, "elapsed_time": "0:23:47", "remaining_time": "0:31:48", "throughput": 5590.11, "total_tokens": 7980592} +{"current_steps": 16215, "total_steps": 37885, "loss": 0.0001, "lr": 1.4131965423004394e-06, "epoch": 2.140029035238221, "percentage": 42.8, "elapsed_time": "0:23:47", "remaining_time": "0:31:48", "throughput": 5590.44, "total_tokens": 7982896} +{"current_steps": 16220, "total_steps": 37885, "loss": 0.0297, "lr": 1.4127769674637777e-06, "epoch": 2.1406889270159692, "percentage": 42.81, "elapsed_time": "0:23:48", "remaining_time": "0:31:47", "throughput": 5590.69, "total_tokens": 7985072} +{"current_steps": 16225, "total_steps": 37885, "loss": 0.1126, "lr": 1.4123573050181937e-06, "epoch": 2.141348818793718, "percentage": 42.83, "elapsed_time": "0:23:48", "remaining_time": "0:31:47", "throughput": 5591.31, "total_tokens": 7987824} +{"current_steps": 16230, "total_steps": 37885, "loss": 0.0004, "lr": 1.4119375550527578e-06, "epoch": 2.1420087105714662, "percentage": 42.84, "elapsed_time": "0:23:48", "remaining_time": "0:31:46", "throughput": 5591.73, "total_tokens": 7990256} +{"current_steps": 16235, "total_steps": 37885, "loss": 0.0001, "lr": 1.4115177176565587e-06, "epoch": 2.1426686023492145, "percentage": 42.85, "elapsed_time": "0:23:49", "remaining_time": "0:31:45", "throughput": 5592.31, "total_tokens": 7992944} +{"current_steps": 16240, "total_steps": 37885, "loss": 0.0803, "lr": 1.4110977929187042e-06, "epoch": 2.1433284941269632, "percentage": 42.87, "elapsed_time": "0:23:49", "remaining_time": "0:31:45", "throughput": 5592.77, "total_tokens": 7995440} +{"current_steps": 16245, "total_steps": 37885, "loss": 0.0956, "lr": 1.41067778092832e-06, "epoch": 2.1439883859047115, "percentage": 42.88, "elapsed_time": "0:23:49", "remaining_time": "0:31:44", "throughput": 5593.14, "total_tokens": 7997808} +{"current_steps": 16250, "total_steps": 37885, "loss": 0.0798, "lr": 1.4102576817745506e-06, "epoch": 2.1446482776824602, "percentage": 42.89, "elapsed_time": "0:23:50", "remaining_time": "0:31:44", "throughput": 5593.6, "total_tokens": 8000304} +{"current_steps": 16255, "total_steps": 37885, "loss": 0.1273, "lr": 1.4098374955465592e-06, "epoch": 2.1453081694602085, "percentage": 42.91, "elapsed_time": "0:23:50", "remaining_time": "0:31:43", "throughput": 5594.18, "total_tokens": 8002992} +{"current_steps": 16260, "total_steps": 37885, "loss": 0.0005, "lr": 1.409417222333527e-06, "epoch": 2.145968061237957, "percentage": 42.92, "elapsed_time": "0:23:50", "remaining_time": "0:31:43", "throughput": 5594.63, "total_tokens": 8005488} +{"current_steps": 16265, "total_steps": 37885, "loss": 0.0005, "lr": 1.4089968622246543e-06, "epoch": 2.1466279530157055, "percentage": 42.93, "elapsed_time": "0:23:51", "remaining_time": "0:31:42", "throughput": 5595.26, "total_tokens": 8008240} +{"current_steps": 16270, "total_steps": 37885, "loss": 0.1245, "lr": 1.4085764153091595e-06, "epoch": 2.147287844793454, "percentage": 42.95, "elapsed_time": "0:23:51", "remaining_time": "0:31:41", "throughput": 5595.8, "total_tokens": 8010864} +{"current_steps": 16275, "total_steps": 37885, "loss": 0.0894, "lr": 1.4081558816762788e-06, "epoch": 2.1479477365712025, "percentage": 42.96, "elapsed_time": "0:23:51", "remaining_time": "0:31:41", "throughput": 5596.22, "total_tokens": 8013296} +{"current_steps": 16280, "total_steps": 37885, "loss": 0.0005, "lr": 1.4077352614152683e-06, "epoch": 2.148607628348951, "percentage": 42.97, "elapsed_time": "0:23:52", "remaining_time": "0:31:40", "throughput": 5596.84, "total_tokens": 8016048} +{"current_steps": 16285, "total_steps": 37885, "loss": 0.0012, "lr": 1.407314554615401e-06, "epoch": 2.149267520126699, "percentage": 42.99, "elapsed_time": "0:23:52", "remaining_time": "0:31:40", "throughput": 5597.25, "total_tokens": 8018480} +{"current_steps": 16290, "total_steps": 37885, "loss": 0.0413, "lr": 1.406893761365969e-06, "epoch": 2.149927411904448, "percentage": 43.0, "elapsed_time": "0:23:52", "remaining_time": "0:31:39", "throughput": 5597.75, "total_tokens": 8021040} +{"current_steps": 16295, "total_steps": 37885, "loss": 0.0005, "lr": 1.4064728817562825e-06, "epoch": 2.150587303682196, "percentage": 43.01, "elapsed_time": "0:23:53", "remaining_time": "0:31:38", "throughput": 5598.28, "total_tokens": 8023664} +{"current_steps": 16300, "total_steps": 37885, "loss": 0.0003, "lr": 1.4060519158756702e-06, "epoch": 2.1512471954599444, "percentage": 43.02, "elapsed_time": "0:23:53", "remaining_time": "0:31:38", "throughput": 5598.45, "total_tokens": 8025712} +{"current_steps": 16305, "total_steps": 37885, "loss": 0.0002, "lr": 1.4056308638134794e-06, "epoch": 2.151907087237693, "percentage": 43.04, "elapsed_time": "0:23:53", "remaining_time": "0:31:37", "throughput": 5598.91, "total_tokens": 8028208} +{"current_steps": 16310, "total_steps": 37885, "loss": 0.0002, "lr": 1.4052097256590752e-06, "epoch": 2.1525669790154414, "percentage": 43.05, "elapsed_time": "0:23:54", "remaining_time": "0:31:37", "throughput": 5599.31, "total_tokens": 8030640} +{"current_steps": 16315, "total_steps": 37885, "loss": 0.0613, "lr": 1.4047885015018407e-06, "epoch": 2.15322687079319, "percentage": 43.06, "elapsed_time": "0:23:54", "remaining_time": "0:31:36", "throughput": 5599.77, "total_tokens": 8033136} +{"current_steps": 16320, "total_steps": 37885, "loss": 0.054, "lr": 1.4043671914311785e-06, "epoch": 2.1538867625709384, "percentage": 43.08, "elapsed_time": "0:23:54", "remaining_time": "0:31:36", "throughput": 5600.27, "total_tokens": 8035696} +{"current_steps": 16325, "total_steps": 37885, "loss": 0.0348, "lr": 1.4039457955365077e-06, "epoch": 2.1545466543486866, "percentage": 43.09, "elapsed_time": "0:23:55", "remaining_time": "0:31:35", "throughput": 5600.89, "total_tokens": 8038448} +{"current_steps": 16330, "total_steps": 37885, "loss": 0.1972, "lr": 1.403524313907267e-06, "epoch": 2.1552065461264354, "percentage": 43.1, "elapsed_time": "0:23:55", "remaining_time": "0:31:34", "throughput": 5601.34, "total_tokens": 8040944} +{"current_steps": 16335, "total_steps": 37885, "loss": 0.0004, "lr": 1.403102746632913e-06, "epoch": 2.1558664379041836, "percentage": 43.12, "elapsed_time": "0:23:55", "remaining_time": "0:31:34", "throughput": 5601.71, "total_tokens": 8043312} +{"current_steps": 16340, "total_steps": 37885, "loss": 0.0002, "lr": 1.4026810938029197e-06, "epoch": 2.1565263296819324, "percentage": 43.13, "elapsed_time": "0:23:56", "remaining_time": "0:31:33", "throughput": 5602.21, "total_tokens": 8045872} +{"current_steps": 16345, "total_steps": 37885, "loss": 0.0644, "lr": 1.4022593555067804e-06, "epoch": 2.1571862214596806, "percentage": 43.14, "elapsed_time": "0:23:56", "remaining_time": "0:31:33", "throughput": 5602.83, "total_tokens": 8048624} +{"current_steps": 16350, "total_steps": 37885, "loss": 0.1225, "lr": 1.401837531834006e-06, "epoch": 2.157846113237429, "percentage": 43.16, "elapsed_time": "0:23:56", "remaining_time": "0:31:32", "throughput": 5603.1, "total_tokens": 8050864} +{"current_steps": 16355, "total_steps": 37885, "loss": 0.0007, "lr": 1.401415622874125e-06, "epoch": 2.1585060050151776, "percentage": 43.17, "elapsed_time": "0:23:57", "remaining_time": "0:31:31", "throughput": 5603.72, "total_tokens": 8053616} +{"current_steps": 16360, "total_steps": 37885, "loss": 0.0002, "lr": 1.400993628716685e-06, "epoch": 2.159165896792926, "percentage": 43.18, "elapsed_time": "0:23:57", "remaining_time": "0:31:31", "throughput": 5604.1, "total_tokens": 8056048} +{"current_steps": 16365, "total_steps": 37885, "loss": 0.0399, "lr": 1.400571549451251e-06, "epoch": 2.159825788570674, "percentage": 43.2, "elapsed_time": "0:23:57", "remaining_time": "0:31:30", "throughput": 5604.4, "total_tokens": 8058288} +{"current_steps": 16370, "total_steps": 37885, "loss": 0.0002, "lr": 1.4001493851674066e-06, "epoch": 2.160485680348423, "percentage": 43.21, "elapsed_time": "0:23:58", "remaining_time": "0:31:30", "throughput": 5604.73, "total_tokens": 8060592} +{"current_steps": 16375, "total_steps": 37885, "loss": 0.0229, "lr": 1.3997271359547529e-06, "epoch": 2.161145572126171, "percentage": 43.22, "elapsed_time": "0:23:58", "remaining_time": "0:31:29", "throughput": 5605.1, "total_tokens": 8062960} +{"current_steps": 16380, "total_steps": 37885, "loss": 0.0017, "lr": 1.3993048019029088e-06, "epoch": 2.16180546390392, "percentage": 43.24, "elapsed_time": "0:23:58", "remaining_time": "0:31:29", "throughput": 5605.63, "total_tokens": 8065584} +{"current_steps": 16385, "total_steps": 37885, "loss": 0.0704, "lr": 1.3988823831015125e-06, "epoch": 2.162465355681668, "percentage": 43.25, "elapsed_time": "0:23:59", "remaining_time": "0:31:28", "throughput": 5606.12, "total_tokens": 8068144} +{"current_steps": 16390, "total_steps": 37885, "loss": 0.0004, "lr": 1.3984598796402183e-06, "epoch": 2.1631252474594165, "percentage": 43.26, "elapsed_time": "0:23:59", "remaining_time": "0:31:27", "throughput": 5606.41, "total_tokens": 8070384} +{"current_steps": 16395, "total_steps": 37885, "loss": 0.0002, "lr": 1.3980372916087006e-06, "epoch": 2.163785139237165, "percentage": 43.28, "elapsed_time": "0:23:59", "remaining_time": "0:31:27", "throughput": 5606.83, "total_tokens": 8072816} +{"current_steps": 16400, "total_steps": 37885, "loss": 0.0014, "lr": 1.3976146190966498e-06, "epoch": 2.1644450310149135, "percentage": 43.29, "elapsed_time": "0:24:00", "remaining_time": "0:31:26", "throughput": 5607.2, "total_tokens": 8075184} +{"current_steps": 16405, "total_steps": 37885, "loss": 0.0581, "lr": 1.3971918621937756e-06, "epoch": 2.165104922792662, "percentage": 43.3, "elapsed_time": "0:24:00", "remaining_time": "0:31:26", "throughput": 5607.48, "total_tokens": 8077424} +{"current_steps": 16410, "total_steps": 37885, "loss": 0.0001, "lr": 1.3967690209898046e-06, "epoch": 2.1657648145704105, "percentage": 43.32, "elapsed_time": "0:24:00", "remaining_time": "0:31:25", "throughput": 5608.02, "total_tokens": 8080048} +{"current_steps": 16415, "total_steps": 37885, "loss": 0.0478, "lr": 1.3963460955744824e-06, "epoch": 2.1664247063481588, "percentage": 43.33, "elapsed_time": "0:24:01", "remaining_time": "0:31:24", "throughput": 5608.39, "total_tokens": 8082416} +{"current_steps": 16420, "total_steps": 37885, "loss": 0.0001, "lr": 1.3959230860375716e-06, "epoch": 2.1670845981259075, "percentage": 43.34, "elapsed_time": "0:24:01", "remaining_time": "0:31:24", "throughput": 5608.97, "total_tokens": 8085104} +{"current_steps": 16425, "total_steps": 37885, "loss": 0.1084, "lr": 1.3954999924688522e-06, "epoch": 2.1677444899036558, "percentage": 43.35, "elapsed_time": "0:24:01", "remaining_time": "0:31:23", "throughput": 5609.29, "total_tokens": 8087408} +{"current_steps": 16430, "total_steps": 37885, "loss": 0.0489, "lr": 1.395076814958124e-06, "epoch": 2.1684043816814045, "percentage": 43.37, "elapsed_time": "0:24:02", "remaining_time": "0:31:23", "throughput": 5609.45, "total_tokens": 8089456} +{"current_steps": 16435, "total_steps": 37885, "loss": 0.0002, "lr": 1.3946535535952024e-06, "epoch": 2.1690642734591528, "percentage": 43.38, "elapsed_time": "0:24:02", "remaining_time": "0:31:22", "throughput": 5610.13, "total_tokens": 8092336} +{"current_steps": 16440, "total_steps": 37885, "loss": 0.0369, "lr": 1.394230208469922e-06, "epoch": 2.169724165236901, "percentage": 43.39, "elapsed_time": "0:24:02", "remaining_time": "0:31:22", "throughput": 5610.45, "total_tokens": 8094640} +{"current_steps": 16445, "total_steps": 37885, "loss": 0.0711, "lr": 1.3938067796721349e-06, "epoch": 2.1703840570146498, "percentage": 43.41, "elapsed_time": "0:24:03", "remaining_time": "0:31:21", "throughput": 5610.87, "total_tokens": 8097072} +{"current_steps": 16450, "total_steps": 37885, "loss": 0.0007, "lr": 1.3933832672917101e-06, "epoch": 2.171043948792398, "percentage": 43.42, "elapsed_time": "0:24:03", "remaining_time": "0:31:20", "throughput": 5611.27, "total_tokens": 8099504} +{"current_steps": 16455, "total_steps": 37885, "loss": 0.0004, "lr": 1.3929596714185357e-06, "epoch": 2.1717038405701463, "percentage": 43.43, "elapsed_time": "0:24:03", "remaining_time": "0:31:20", "throughput": 5611.5, "total_tokens": 8101680} +{"current_steps": 16460, "total_steps": 37885, "loss": 0.1802, "lr": 1.3925359921425166e-06, "epoch": 2.172363732347895, "percentage": 43.45, "elapsed_time": "0:24:04", "remaining_time": "0:31:19", "throughput": 5612.03, "total_tokens": 8104304} +{"current_steps": 16465, "total_steps": 37885, "loss": 0.0001, "lr": 1.3921122295535756e-06, "epoch": 2.1730236241256433, "percentage": 43.46, "elapsed_time": "0:24:04", "remaining_time": "0:31:19", "throughput": 5612.4, "total_tokens": 8106672} +{"current_steps": 16470, "total_steps": 37885, "loss": 0.091, "lr": 1.3916883837416536e-06, "epoch": 2.173683515903392, "percentage": 43.47, "elapsed_time": "0:24:04", "remaining_time": "0:31:18", "throughput": 5612.73, "total_tokens": 8108976} +{"current_steps": 16475, "total_steps": 37885, "loss": 0.004, "lr": 1.3912644547967085e-06, "epoch": 2.1743434076811403, "percentage": 43.49, "elapsed_time": "0:24:05", "remaining_time": "0:31:17", "throughput": 5613.12, "total_tokens": 8111408} +{"current_steps": 16480, "total_steps": 37885, "loss": 0.0458, "lr": 1.390840442808716e-06, "epoch": 2.1750032994588886, "percentage": 43.5, "elapsed_time": "0:24:05", "remaining_time": "0:31:17", "throughput": 5613.57, "total_tokens": 8113904} +{"current_steps": 16485, "total_steps": 37885, "loss": 0.1022, "lr": 1.3904163478676698e-06, "epoch": 2.1756631912366373, "percentage": 43.51, "elapsed_time": "0:24:05", "remaining_time": "0:31:16", "throughput": 5613.97, "total_tokens": 8116336} +{"current_steps": 16490, "total_steps": 37885, "loss": 0.0318, "lr": 1.3899921700635808e-06, "epoch": 2.1763230830143856, "percentage": 43.53, "elapsed_time": "0:24:06", "remaining_time": "0:31:16", "throughput": 5614.3, "total_tokens": 8118640} +{"current_steps": 16495, "total_steps": 37885, "loss": 0.0517, "lr": 1.389567909486478e-06, "epoch": 2.176982974792134, "percentage": 43.54, "elapsed_time": "0:24:06", "remaining_time": "0:31:15", "throughput": 5614.67, "total_tokens": 8121008} +{"current_steps": 16500, "total_steps": 37885, "loss": 0.065, "lr": 1.3891435662264077e-06, "epoch": 2.1776428665698826, "percentage": 43.55, "elapsed_time": "0:24:06", "remaining_time": "0:31:15", "throughput": 5615.19, "total_tokens": 8123632} +{"current_steps": 16505, "total_steps": 37885, "loss": 0.0667, "lr": 1.3887191403734328e-06, "epoch": 2.178302758347631, "percentage": 43.57, "elapsed_time": "0:24:07", "remaining_time": "0:31:14", "throughput": 5615.72, "total_tokens": 8126256} +{"current_steps": 16510, "total_steps": 37885, "loss": 0.0536, "lr": 1.3882946320176358e-06, "epoch": 2.1789626501253796, "percentage": 43.58, "elapsed_time": "0:24:07", "remaining_time": "0:31:13", "throughput": 5616.34, "total_tokens": 8129072} +{"current_steps": 16515, "total_steps": 37885, "loss": 0.0269, "lr": 1.3878700412491147e-06, "epoch": 2.179622541903128, "percentage": 43.59, "elapsed_time": "0:24:07", "remaining_time": "0:31:13", "throughput": 5616.83, "total_tokens": 8131632} +{"current_steps": 16520, "total_steps": 37885, "loss": 0.0001, "lr": 1.3874453681579861e-06, "epoch": 2.180282433680876, "percentage": 43.61, "elapsed_time": "0:24:08", "remaining_time": "0:31:12", "throughput": 5617.32, "total_tokens": 8134192} +{"current_steps": 16525, "total_steps": 37885, "loss": 0.0014, "lr": 1.3870206128343838e-06, "epoch": 2.180942325458625, "percentage": 43.62, "elapsed_time": "0:24:08", "remaining_time": "0:31:12", "throughput": 5617.59, "total_tokens": 8136432} +{"current_steps": 16530, "total_steps": 37885, "loss": 0.0003, "lr": 1.386595775368459e-06, "epoch": 2.181602217236373, "percentage": 43.63, "elapsed_time": "0:24:08", "remaining_time": "0:31:11", "throughput": 5617.79, "total_tokens": 8138544} +{"current_steps": 16535, "total_steps": 37885, "loss": 0.0551, "lr": 1.3861708558503804e-06, "epoch": 2.182262109014122, "percentage": 43.65, "elapsed_time": "0:24:09", "remaining_time": "0:31:10", "throughput": 5618.19, "total_tokens": 8140976} +{"current_steps": 16540, "total_steps": 37885, "loss": 0.0008, "lr": 1.385745854370334e-06, "epoch": 2.18292200079187, "percentage": 43.66, "elapsed_time": "0:24:09", "remaining_time": "0:31:10", "throughput": 5618.56, "total_tokens": 8143344} +{"current_steps": 16545, "total_steps": 37885, "loss": 0.0352, "lr": 1.3853207710185233e-06, "epoch": 2.1835818925696184, "percentage": 43.67, "elapsed_time": "0:24:09", "remaining_time": "0:31:09", "throughput": 5618.71, "total_tokens": 8145392} +{"current_steps": 16550, "total_steps": 37885, "loss": 0.0001, "lr": 1.3848956058851695e-06, "epoch": 2.184241784347367, "percentage": 43.68, "elapsed_time": "0:24:10", "remaining_time": "0:31:09", "throughput": 5619.27, "total_tokens": 8148080} +{"current_steps": 16555, "total_steps": 37885, "loss": 0.0695, "lr": 1.3844703590605105e-06, "epoch": 2.1849016761251154, "percentage": 43.7, "elapsed_time": "0:24:10", "remaining_time": "0:31:08", "throughput": 5619.63, "total_tokens": 8150448} +{"current_steps": 16560, "total_steps": 37885, "loss": 0.109, "lr": 1.3840450306348017e-06, "epoch": 2.185561567902864, "percentage": 43.71, "elapsed_time": "0:24:10", "remaining_time": "0:31:08", "throughput": 5620.03, "total_tokens": 8152880} +{"current_steps": 16565, "total_steps": 37885, "loss": 0.0972, "lr": 1.3836196206983162e-06, "epoch": 2.1862214596806124, "percentage": 43.72, "elapsed_time": "0:24:11", "remaining_time": "0:31:07", "throughput": 5620.4, "total_tokens": 8155248} +{"current_steps": 16570, "total_steps": 37885, "loss": 0.0592, "lr": 1.3831941293413443e-06, "epoch": 2.1868813514583607, "percentage": 43.74, "elapsed_time": "0:24:11", "remaining_time": "0:31:06", "throughput": 5620.88, "total_tokens": 8157808} +{"current_steps": 16575, "total_steps": 37885, "loss": 0.0009, "lr": 1.3827685566541934e-06, "epoch": 2.1875412432361094, "percentage": 43.75, "elapsed_time": "0:24:11", "remaining_time": "0:31:06", "throughput": 5621.36, "total_tokens": 8160368} +{"current_steps": 16580, "total_steps": 37885, "loss": 0.0492, "lr": 1.382342902727188e-06, "epoch": 2.1882011350138577, "percentage": 43.76, "elapsed_time": "0:24:11", "remaining_time": "0:31:05", "throughput": 5621.6, "total_tokens": 8162544} +{"current_steps": 16585, "total_steps": 37885, "loss": 0.0004, "lr": 1.38191716765067e-06, "epoch": 2.188861026791606, "percentage": 43.78, "elapsed_time": "0:24:12", "remaining_time": "0:31:05", "throughput": 5622.13, "total_tokens": 8165168} +{"current_steps": 16590, "total_steps": 37885, "loss": 0.0538, "lr": 1.381491351514999e-06, "epoch": 2.1895209185693547, "percentage": 43.79, "elapsed_time": "0:24:12", "remaining_time": "0:31:04", "throughput": 5622.45, "total_tokens": 8167472} +{"current_steps": 16595, "total_steps": 37885, "loss": 0.0013, "lr": 1.3810654544105512e-06, "epoch": 2.190180810347103, "percentage": 43.8, "elapsed_time": "0:24:12", "remaining_time": "0:31:04", "throughput": 5622.82, "total_tokens": 8169840} +{"current_steps": 16600, "total_steps": 37885, "loss": 0.0007, "lr": 1.38063947642772e-06, "epoch": 2.1908407021248517, "percentage": 43.82, "elapsed_time": "0:24:13", "remaining_time": "0:31:03", "throughput": 5623.21, "total_tokens": 8172272} +{"current_steps": 16605, "total_steps": 37885, "loss": 0.0001, "lr": 1.3802134176569166e-06, "epoch": 2.1915005939026, "percentage": 43.83, "elapsed_time": "0:24:13", "remaining_time": "0:31:02", "throughput": 5623.58, "total_tokens": 8174640} +{"current_steps": 16610, "total_steps": 37885, "loss": 0.0298, "lr": 1.3797872781885685e-06, "epoch": 2.1921604856803483, "percentage": 43.84, "elapsed_time": "0:24:13", "remaining_time": "0:31:02", "throughput": 5624.1, "total_tokens": 8177264} +{"current_steps": 16615, "total_steps": 37885, "loss": 0.0001, "lr": 1.3793610581131207e-06, "epoch": 2.192820377458097, "percentage": 43.86, "elapsed_time": "0:24:14", "remaining_time": "0:31:01", "throughput": 5624.38, "total_tokens": 8179504} +{"current_steps": 16620, "total_steps": 37885, "loss": 0.0782, "lr": 1.3789347575210352e-06, "epoch": 2.1934802692358453, "percentage": 43.87, "elapsed_time": "0:24:14", "remaining_time": "0:31:01", "throughput": 5624.95, "total_tokens": 8182192} +{"current_steps": 16625, "total_steps": 37885, "loss": 0.0031, "lr": 1.3785083765027919e-06, "epoch": 2.1941401610135935, "percentage": 43.88, "elapsed_time": "0:24:14", "remaining_time": "0:31:00", "throughput": 5625.27, "total_tokens": 8184496} +{"current_steps": 16630, "total_steps": 37885, "loss": 0.0002, "lr": 1.3780819151488865e-06, "epoch": 2.1948000527913423, "percentage": 43.9, "elapsed_time": "0:24:15", "remaining_time": "0:31:00", "throughput": 5625.64, "total_tokens": 8186864} +{"current_steps": 16635, "total_steps": 37885, "loss": 0.2403, "lr": 1.3776553735498321e-06, "epoch": 2.1954599445690905, "percentage": 43.91, "elapsed_time": "0:24:15", "remaining_time": "0:30:59", "throughput": 5625.96, "total_tokens": 8189168} +{"current_steps": 16640, "total_steps": 37885, "loss": 0.0315, "lr": 1.37722875179616e-06, "epoch": 2.1961198363468393, "percentage": 43.92, "elapsed_time": "0:24:15", "remaining_time": "0:30:58", "throughput": 5626.3, "total_tokens": 8191536} +{"current_steps": 16645, "total_steps": 37885, "loss": 0.0002, "lr": 1.3768020499784165e-06, "epoch": 2.1967797281245875, "percentage": 43.94, "elapsed_time": "0:24:16", "remaining_time": "0:30:58", "throughput": 5626.94, "total_tokens": 8194352} +{"current_steps": 16650, "total_steps": 37885, "loss": 0.0444, "lr": 1.3763752681871669e-06, "epoch": 2.197439619902336, "percentage": 43.95, "elapsed_time": "0:24:16", "remaining_time": "0:30:57", "throughput": 5627.35, "total_tokens": 8196784} +{"current_steps": 16655, "total_steps": 37885, "loss": 0.0195, "lr": 1.375948406512992e-06, "epoch": 2.1980995116800846, "percentage": 43.96, "elapsed_time": "0:24:16", "remaining_time": "0:30:57", "throughput": 5627.75, "total_tokens": 8199216} +{"current_steps": 16660, "total_steps": 37885, "loss": 0.071, "lr": 1.3755214650464903e-06, "epoch": 2.198759403457833, "percentage": 43.98, "elapsed_time": "0:24:17", "remaining_time": "0:30:56", "throughput": 5628.03, "total_tokens": 8201456} +{"current_steps": 16665, "total_steps": 37885, "loss": 0.0002, "lr": 1.3750944438782769e-06, "epoch": 2.1994192952355816, "percentage": 43.99, "elapsed_time": "0:24:17", "remaining_time": "0:30:55", "throughput": 5628.22, "total_tokens": 8203568} +{"current_steps": 16670, "total_steps": 37885, "loss": 0.0002, "lr": 1.374667343098984e-06, "epoch": 2.20007918701333, "percentage": 44.0, "elapsed_time": "0:24:17", "remaining_time": "0:30:55", "throughput": 5628.54, "total_tokens": 8205872} +{"current_steps": 16675, "total_steps": 37885, "loss": 0.0201, "lr": 1.3742401627992604e-06, "epoch": 2.200739078791078, "percentage": 44.01, "elapsed_time": "0:24:18", "remaining_time": "0:30:54", "throughput": 5629.03, "total_tokens": 8208432} +{"current_steps": 16680, "total_steps": 37885, "loss": 0.2684, "lr": 1.3738129030697724e-06, "epoch": 2.201398970568827, "percentage": 44.03, "elapsed_time": "0:24:18", "remaining_time": "0:30:54", "throughput": 5629.47, "total_tokens": 8210928} +{"current_steps": 16685, "total_steps": 37885, "loss": 0.0003, "lr": 1.3733855640012028e-06, "epoch": 2.202058862346575, "percentage": 44.04, "elapsed_time": "0:24:18", "remaining_time": "0:30:53", "throughput": 5629.75, "total_tokens": 8213168} +{"current_steps": 16690, "total_steps": 37885, "loss": 0.1604, "lr": 1.372958145684251e-06, "epoch": 2.202718754124324, "percentage": 44.05, "elapsed_time": "0:24:19", "remaining_time": "0:30:53", "throughput": 5630.1, "total_tokens": 8215536} +{"current_steps": 16695, "total_steps": 37885, "loss": 0.0022, "lr": 1.3725306482096337e-06, "epoch": 2.203378645902072, "percentage": 44.07, "elapsed_time": "0:24:19", "remaining_time": "0:30:52", "throughput": 5630.46, "total_tokens": 8217904} +{"current_steps": 16700, "total_steps": 37885, "loss": 0.0005, "lr": 1.3721030716680835e-06, "epoch": 2.2040385376798204, "percentage": 44.08, "elapsed_time": "0:24:19", "remaining_time": "0:30:51", "throughput": 5630.78, "total_tokens": 8220208} +{"current_steps": 16705, "total_steps": 37885, "loss": 0.0322, "lr": 1.3716754161503514e-06, "epoch": 2.204698429457569, "percentage": 44.09, "elapsed_time": "0:24:20", "remaining_time": "0:30:51", "throughput": 5631.29, "total_tokens": 8222832} +{"current_steps": 16710, "total_steps": 37885, "loss": 0.0001, "lr": 1.3712476817472037e-06, "epoch": 2.2053583212353174, "percentage": 44.11, "elapsed_time": "0:24:20", "remaining_time": "0:30:50", "throughput": 5631.69, "total_tokens": 8225264} +{"current_steps": 16715, "total_steps": 37885, "loss": 0.1097, "lr": 1.3708198685494234e-06, "epoch": 2.2060182130130657, "percentage": 44.12, "elapsed_time": "0:24:20", "remaining_time": "0:30:50", "throughput": 5632.05, "total_tokens": 8227632} +{"current_steps": 16720, "total_steps": 37885, "loss": 0.0618, "lr": 1.3703919766478116e-06, "epoch": 2.2066781047908144, "percentage": 44.13, "elapsed_time": "0:24:21", "remaining_time": "0:30:49", "throughput": 5632.69, "total_tokens": 8230448} +{"current_steps": 16725, "total_steps": 37885, "loss": 0.1022, "lr": 1.369964006133185e-06, "epoch": 2.2073379965685627, "percentage": 44.15, "elapsed_time": "0:24:21", "remaining_time": "0:30:49", "throughput": 5633.17, "total_tokens": 8233008} +{"current_steps": 16730, "total_steps": 37885, "loss": 0.0281, "lr": 1.3695359570963772e-06, "epoch": 2.2079978883463114, "percentage": 44.16, "elapsed_time": "0:24:21", "remaining_time": "0:30:48", "throughput": 5633.64, "total_tokens": 8235568} +{"current_steps": 16735, "total_steps": 37885, "loss": 0.0354, "lr": 1.3691078296282383e-06, "epoch": 2.2086577801240597, "percentage": 44.17, "elapsed_time": "0:24:22", "remaining_time": "0:30:47", "throughput": 5633.87, "total_tokens": 8237744} +{"current_steps": 16740, "total_steps": 37885, "loss": 0.0026, "lr": 1.3686796238196357e-06, "epoch": 2.209317671901808, "percentage": 44.19, "elapsed_time": "0:24:22", "remaining_time": "0:30:47", "throughput": 5634.39, "total_tokens": 8240368} +{"current_steps": 16745, "total_steps": 37885, "loss": 0.127, "lr": 1.3682513397614522e-06, "epoch": 2.2099775636795567, "percentage": 44.2, "elapsed_time": "0:24:22", "remaining_time": "0:30:46", "throughput": 5634.78, "total_tokens": 8242800} +{"current_steps": 16750, "total_steps": 37885, "loss": 0.0009, "lr": 1.367822977544589e-06, "epoch": 2.210637455457305, "percentage": 44.21, "elapsed_time": "0:24:23", "remaining_time": "0:30:46", "throughput": 5635.17, "total_tokens": 8245232} +{"current_steps": 16755, "total_steps": 37885, "loss": 0.0009, "lr": 1.3673945372599623e-06, "epoch": 2.2112973472350532, "percentage": 44.23, "elapsed_time": "0:24:23", "remaining_time": "0:30:45", "throughput": 5635.68, "total_tokens": 8247856} +{"current_steps": 16760, "total_steps": 37885, "loss": 0.0835, "lr": 1.366966018998505e-06, "epoch": 2.211957239012802, "percentage": 44.24, "elapsed_time": "0:24:23", "remaining_time": "0:30:45", "throughput": 5636.12, "total_tokens": 8250352} +{"current_steps": 16765, "total_steps": 37885, "loss": 0.0053, "lr": 1.3665374228511681e-06, "epoch": 2.2126171307905502, "percentage": 44.25, "elapsed_time": "0:24:24", "remaining_time": "0:30:44", "throughput": 5636.48, "total_tokens": 8252720} +{"current_steps": 16770, "total_steps": 37885, "loss": 0.0008, "lr": 1.366108748908917e-06, "epoch": 2.213277022568299, "percentage": 44.27, "elapsed_time": "0:24:24", "remaining_time": "0:30:43", "throughput": 5637.0, "total_tokens": 8255344} +{"current_steps": 16775, "total_steps": 37885, "loss": 0.0782, "lr": 1.3656799972627355e-06, "epoch": 2.2139369143460472, "percentage": 44.28, "elapsed_time": "0:24:24", "remaining_time": "0:30:43", "throughput": 5637.31, "total_tokens": 8257648} +{"current_steps": 16780, "total_steps": 37885, "loss": 0.0472, "lr": 1.3652511680036227e-06, "epoch": 2.2145968061237955, "percentage": 44.29, "elapsed_time": "0:24:25", "remaining_time": "0:30:42", "throughput": 5637.87, "total_tokens": 8260336} +{"current_steps": 16785, "total_steps": 37885, "loss": 0.0985, "lr": 1.3648222612225941e-06, "epoch": 2.2152566979015442, "percentage": 44.31, "elapsed_time": "0:24:25", "remaining_time": "0:30:42", "throughput": 5638.49, "total_tokens": 8263152} +{"current_steps": 16790, "total_steps": 37885, "loss": 0.0595, "lr": 1.3643932770106824e-06, "epoch": 2.2159165896792925, "percentage": 44.32, "elapsed_time": "0:24:25", "remaining_time": "0:30:41", "throughput": 5638.88, "total_tokens": 8265584} +{"current_steps": 16795, "total_steps": 37885, "loss": 0.0005, "lr": 1.3639642154589365e-06, "epoch": 2.2165764814570412, "percentage": 44.33, "elapsed_time": "0:24:26", "remaining_time": "0:30:41", "throughput": 5639.11, "total_tokens": 8267760} +{"current_steps": 16800, "total_steps": 37885, "loss": 0.0002, "lr": 1.3635350766584217e-06, "epoch": 2.2172363732347895, "percentage": 44.34, "elapsed_time": "0:24:26", "remaining_time": "0:30:40", "throughput": 5639.55, "total_tokens": 8270256} +{"current_steps": 16805, "total_steps": 37885, "loss": 0.0003, "lr": 1.363105860700219e-06, "epoch": 2.217896265012538, "percentage": 44.36, "elapsed_time": "0:24:26", "remaining_time": "0:30:39", "throughput": 5640.18, "total_tokens": 8273072} +{"current_steps": 16810, "total_steps": 37885, "loss": 0.194, "lr": 1.3626765676754274e-06, "epoch": 2.2185561567902865, "percentage": 44.37, "elapsed_time": "0:24:27", "remaining_time": "0:30:39", "throughput": 5640.49, "total_tokens": 8275376} +{"current_steps": 16815, "total_steps": 37885, "loss": 0.0007, "lr": 1.3622471976751599e-06, "epoch": 2.219216048568035, "percentage": 44.38, "elapsed_time": "0:24:27", "remaining_time": "0:30:38", "throughput": 5640.93, "total_tokens": 8277872} +{"current_steps": 16820, "total_steps": 37885, "loss": 0.0477, "lr": 1.3618177507905484e-06, "epoch": 2.2198759403457835, "percentage": 44.4, "elapsed_time": "0:24:27", "remaining_time": "0:30:38", "throughput": 5641.4, "total_tokens": 8280432} +{"current_steps": 16825, "total_steps": 37885, "loss": 0.0001, "lr": 1.361388227112739e-06, "epoch": 2.220535832123532, "percentage": 44.41, "elapsed_time": "0:24:28", "remaining_time": "0:30:37", "throughput": 5641.8, "total_tokens": 8282864} +{"current_steps": 16830, "total_steps": 37885, "loss": 0.0597, "lr": 1.3609586267328955e-06, "epoch": 2.22119572390128, "percentage": 44.42, "elapsed_time": "0:24:28", "remaining_time": "0:30:37", "throughput": 5642.23, "total_tokens": 8285360} +{"current_steps": 16835, "total_steps": 37885, "loss": 0.0002, "lr": 1.3605289497421974e-06, "epoch": 2.221855615679029, "percentage": 44.44, "elapsed_time": "0:24:28", "remaining_time": "0:30:36", "throughput": 5642.58, "total_tokens": 8287728} +{"current_steps": 16840, "total_steps": 37885, "loss": 0.1485, "lr": 1.3600991962318403e-06, "epoch": 2.222515507456777, "percentage": 44.45, "elapsed_time": "0:24:29", "remaining_time": "0:30:35", "throughput": 5643.06, "total_tokens": 8290288} +{"current_steps": 16845, "total_steps": 37885, "loss": 0.0736, "lr": 1.3596693662930365e-06, "epoch": 2.2231753992345253, "percentage": 44.46, "elapsed_time": "0:24:29", "remaining_time": "0:30:35", "throughput": 5643.45, "total_tokens": 8292720} +{"current_steps": 16850, "total_steps": 37885, "loss": 0.0003, "lr": 1.3592394600170142e-06, "epoch": 2.223835291012274, "percentage": 44.48, "elapsed_time": "0:24:29", "remaining_time": "0:30:34", "throughput": 5643.93, "total_tokens": 8295280} +{"current_steps": 16855, "total_steps": 37885, "loss": 0.0475, "lr": 1.3588094774950181e-06, "epoch": 2.2244951827900223, "percentage": 44.49, "elapsed_time": "0:24:30", "remaining_time": "0:30:34", "throughput": 5644.29, "total_tokens": 8297648} +{"current_steps": 16860, "total_steps": 37885, "loss": 0.0073, "lr": 1.3583794188183087e-06, "epoch": 2.225155074567771, "percentage": 44.5, "elapsed_time": "0:24:30", "remaining_time": "0:30:33", "throughput": 5644.65, "total_tokens": 8300016} +{"current_steps": 16865, "total_steps": 37885, "loss": 0.0011, "lr": 1.3579492840781625e-06, "epoch": 2.2258149663455193, "percentage": 44.52, "elapsed_time": "0:24:30", "remaining_time": "0:30:33", "throughput": 5645.09, "total_tokens": 8302512} +{"current_steps": 16870, "total_steps": 37885, "loss": 0.1018, "lr": 1.357519073365873e-06, "epoch": 2.2264748581232676, "percentage": 44.53, "elapsed_time": "0:24:31", "remaining_time": "0:30:32", "throughput": 5645.59, "total_tokens": 8305136} +{"current_steps": 16875, "total_steps": 37885, "loss": 0.0002, "lr": 1.357088786772749e-06, "epoch": 2.2271347499010163, "percentage": 44.54, "elapsed_time": "0:24:31", "remaining_time": "0:30:31", "throughput": 5646.06, "total_tokens": 8307696} +{"current_steps": 16880, "total_steps": 37885, "loss": 0.0004, "lr": 1.3566584243901163e-06, "epoch": 2.2277946416787646, "percentage": 44.56, "elapsed_time": "0:24:31", "remaining_time": "0:30:31", "throughput": 5646.36, "total_tokens": 8310000} +{"current_steps": 16885, "total_steps": 37885, "loss": 0.0002, "lr": 1.3562279863093154e-06, "epoch": 2.228454533456513, "percentage": 44.57, "elapsed_time": "0:24:32", "remaining_time": "0:30:30", "throughput": 5646.67, "total_tokens": 8312304} +{"current_steps": 16890, "total_steps": 37885, "loss": 0.0001, "lr": 1.3557974726217041e-06, "epoch": 2.2291144252342616, "percentage": 44.58, "elapsed_time": "0:24:32", "remaining_time": "0:30:30", "throughput": 5647.02, "total_tokens": 8314672} +{"current_steps": 16895, "total_steps": 37885, "loss": 0.0003, "lr": 1.3553668834186556e-06, "epoch": 2.22977431701201, "percentage": 44.6, "elapsed_time": "0:24:32", "remaining_time": "0:30:29", "throughput": 5647.45, "total_tokens": 8317168} +{"current_steps": 16900, "total_steps": 37885, "loss": 0.0642, "lr": 1.3549362187915593e-06, "epoch": 2.2304342087897586, "percentage": 44.61, "elapsed_time": "0:24:33", "remaining_time": "0:30:29", "throughput": 5647.97, "total_tokens": 8319792} +{"current_steps": 16905, "total_steps": 37885, "loss": 0.0002, "lr": 1.3545054788318212e-06, "epoch": 2.231094100567507, "percentage": 44.62, "elapsed_time": "0:24:33", "remaining_time": "0:30:28", "throughput": 5648.44, "total_tokens": 8322352} +{"current_steps": 16910, "total_steps": 37885, "loss": 0.1334, "lr": 1.3540746636308623e-06, "epoch": 2.231753992345255, "percentage": 44.64, "elapsed_time": "0:24:33", "remaining_time": "0:30:27", "throughput": 5648.88, "total_tokens": 8324848} +{"current_steps": 16915, "total_steps": 37885, "loss": 0.0002, "lr": 1.3536437732801198e-06, "epoch": 2.232413884123004, "percentage": 44.65, "elapsed_time": "0:24:34", "remaining_time": "0:30:27", "throughput": 5649.15, "total_tokens": 8327088} +{"current_steps": 16920, "total_steps": 37885, "loss": 0.0014, "lr": 1.3532128078710474e-06, "epoch": 2.233073775900752, "percentage": 44.66, "elapsed_time": "0:24:34", "remaining_time": "0:30:26", "throughput": 5649.66, "total_tokens": 8329712} +{"current_steps": 16925, "total_steps": 37885, "loss": 0.0088, "lr": 1.3527817674951143e-06, "epoch": 2.233733667678501, "percentage": 44.67, "elapsed_time": "0:24:34", "remaining_time": "0:30:26", "throughput": 5650.18, "total_tokens": 8332336} +{"current_steps": 16930, "total_steps": 37885, "loss": 0.0002, "lr": 1.3523506522438056e-06, "epoch": 2.234393559456249, "percentage": 44.69, "elapsed_time": "0:24:35", "remaining_time": "0:30:25", "throughput": 5650.49, "total_tokens": 8334640} +{"current_steps": 16935, "total_steps": 37885, "loss": 0.0535, "lr": 1.3519194622086227e-06, "epoch": 2.2350534512339975, "percentage": 44.7, "elapsed_time": "0:24:35", "remaining_time": "0:30:25", "throughput": 5650.89, "total_tokens": 8337072} +{"current_steps": 16940, "total_steps": 37885, "loss": 0.0806, "lr": 1.3514881974810823e-06, "epoch": 2.235713343011746, "percentage": 44.71, "elapsed_time": "0:24:35", "remaining_time": "0:30:24", "throughput": 5651.19, "total_tokens": 8339376} +{"current_steps": 16945, "total_steps": 37885, "loss": 0.1198, "lr": 1.3510568581527171e-06, "epoch": 2.2363732347894945, "percentage": 44.73, "elapsed_time": "0:24:36", "remaining_time": "0:30:24", "throughput": 5651.45, "total_tokens": 8341616} +{"current_steps": 16950, "total_steps": 37885, "loss": 0.0794, "lr": 1.3506254443150761e-06, "epoch": 2.237033126567243, "percentage": 44.74, "elapsed_time": "0:24:36", "remaining_time": "0:30:23", "throughput": 5651.93, "total_tokens": 8344176} +{"current_steps": 16955, "total_steps": 37885, "loss": 0.0985, "lr": 1.3501939560597233e-06, "epoch": 2.2376930183449915, "percentage": 44.75, "elapsed_time": "0:24:36", "remaining_time": "0:30:22", "throughput": 5652.32, "total_tokens": 8346608} +{"current_steps": 16960, "total_steps": 37885, "loss": 0.0123, "lr": 1.3497623934782397e-06, "epoch": 2.2383529101227397, "percentage": 44.77, "elapsed_time": "0:24:37", "remaining_time": "0:30:22", "throughput": 5652.95, "total_tokens": 8349424} +{"current_steps": 16965, "total_steps": 37885, "loss": 0.0019, "lr": 1.3493307566622204e-06, "epoch": 2.2390128019004885, "percentage": 44.78, "elapsed_time": "0:24:37", "remaining_time": "0:30:21", "throughput": 5653.26, "total_tokens": 8351728} +{"current_steps": 16970, "total_steps": 37885, "loss": 0.0007, "lr": 1.3488990457032778e-06, "epoch": 2.2396726936782367, "percentage": 44.79, "elapsed_time": "0:24:37", "remaining_time": "0:30:21", "throughput": 5653.65, "total_tokens": 8354160} +{"current_steps": 16975, "total_steps": 37885, "loss": 0.0096, "lr": 1.3484672606930393e-06, "epoch": 2.240332585455985, "percentage": 44.81, "elapsed_time": "0:24:37", "remaining_time": "0:30:20", "throughput": 5653.83, "total_tokens": 8356272} +{"current_steps": 16980, "total_steps": 37885, "loss": 0.0001, "lr": 1.3480354017231483e-06, "epoch": 2.2409924772337337, "percentage": 44.82, "elapsed_time": "0:24:38", "remaining_time": "0:30:20", "throughput": 5654.14, "total_tokens": 8358576} +{"current_steps": 16985, "total_steps": 37885, "loss": 0.0025, "lr": 1.3476034688852633e-06, "epoch": 2.241652369011482, "percentage": 44.83, "elapsed_time": "0:24:38", "remaining_time": "0:30:19", "throughput": 5654.53, "total_tokens": 8361008} +{"current_steps": 16990, "total_steps": 37885, "loss": 0.056, "lr": 1.3471714622710595e-06, "epoch": 2.2423122607892307, "percentage": 44.85, "elapsed_time": "0:24:38", "remaining_time": "0:30:18", "throughput": 5654.96, "total_tokens": 8363504} +{"current_steps": 16995, "total_steps": 37885, "loss": 0.0, "lr": 1.3467393819722265e-06, "epoch": 2.242972152566979, "percentage": 44.86, "elapsed_time": "0:24:39", "remaining_time": "0:30:18", "throughput": 5655.18, "total_tokens": 8365680} +{"current_steps": 17000, "total_steps": 37885, "loss": 0.0002, "lr": 1.3463072280804708e-06, "epoch": 2.2436320443447273, "percentage": 44.87, "elapsed_time": "0:24:39", "remaining_time": "0:30:17", "throughput": 5655.76, "total_tokens": 8368432} +{"current_steps": 17005, "total_steps": 37885, "loss": 0.0004, "lr": 1.3458750006875134e-06, "epoch": 2.244291936122476, "percentage": 44.89, "elapsed_time": "0:24:39", "remaining_time": "0:30:17", "throughput": 5656.08, "total_tokens": 8370736} +{"current_steps": 17010, "total_steps": 37885, "loss": 0.0002, "lr": 1.3454426998850919e-06, "epoch": 2.2449518279002243, "percentage": 44.9, "elapsed_time": "0:24:40", "remaining_time": "0:30:16", "throughput": 5656.66, "total_tokens": 8373488} +{"current_steps": 17015, "total_steps": 37885, "loss": 0.0001, "lr": 1.345010325764959e-06, "epoch": 2.245611719677973, "percentage": 44.91, "elapsed_time": "0:24:40", "remaining_time": "0:30:16", "throughput": 5657.09, "total_tokens": 8375984} +{"current_steps": 17020, "total_steps": 37885, "loss": 0.0002, "lr": 1.3445778784188828e-06, "epoch": 2.2462716114557213, "percentage": 44.93, "elapsed_time": "0:24:40", "remaining_time": "0:30:15", "throughput": 5657.52, "total_tokens": 8378480} +{"current_steps": 17025, "total_steps": 37885, "loss": 0.0004, "lr": 1.3441453579386468e-06, "epoch": 2.2469315032334696, "percentage": 44.94, "elapsed_time": "0:24:41", "remaining_time": "0:30:14", "throughput": 5657.73, "total_tokens": 8380656} +{"current_steps": 17030, "total_steps": 37885, "loss": 0.0447, "lr": 1.343712764416051e-06, "epoch": 2.2475913950112183, "percentage": 44.95, "elapsed_time": "0:24:41", "remaining_time": "0:30:14", "throughput": 5658.31, "total_tokens": 8383408} +{"current_steps": 17035, "total_steps": 37885, "loss": 0.1017, "lr": 1.3432800979429097e-06, "epoch": 2.2482512867889666, "percentage": 44.97, "elapsed_time": "0:24:41", "remaining_time": "0:30:13", "throughput": 5658.73, "total_tokens": 8385904} +{"current_steps": 17040, "total_steps": 37885, "loss": 0.0847, "lr": 1.3428473586110537e-06, "epoch": 2.248911178566715, "percentage": 44.98, "elapsed_time": "0:24:42", "remaining_time": "0:30:13", "throughput": 5659.17, "total_tokens": 8388400} +{"current_steps": 17045, "total_steps": 37885, "loss": 0.0001, "lr": 1.3424145465123286e-06, "epoch": 2.2495710703444636, "percentage": 44.99, "elapsed_time": "0:24:42", "remaining_time": "0:30:12", "throughput": 5659.44, "total_tokens": 8390640} +{"current_steps": 17050, "total_steps": 37885, "loss": 0.0, "lr": 1.3419816617385953e-06, "epoch": 2.250230962122212, "percentage": 45.0, "elapsed_time": "0:24:42", "remaining_time": "0:30:12", "throughput": 5659.89, "total_tokens": 8393200} +{"current_steps": 17055, "total_steps": 37885, "loss": 0.0556, "lr": 1.3415487043817311e-06, "epoch": 2.2508908538999606, "percentage": 45.02, "elapsed_time": "0:24:43", "remaining_time": "0:30:11", "throughput": 5660.28, "total_tokens": 8395632} +{"current_steps": 17055, "total_steps": 37885, "eval_loss": 0.1500292271375656, "epoch": 2.2508908538999606, "percentage": 45.02, "elapsed_time": "0:24:51", "remaining_time": "0:30:21", "throughput": 5630.56, "total_tokens": 8395632} +{"current_steps": 17060, "total_steps": 37885, "loss": 0.0, "lr": 1.3411156745336272e-06, "epoch": 2.251550745677709, "percentage": 45.03, "elapsed_time": "0:25:28", "remaining_time": "0:31:05", "throughput": 5495.23, "total_tokens": 8397872} +{"current_steps": 17065, "total_steps": 37885, "loss": 0.0002, "lr": 1.3406825722861921e-06, "epoch": 2.252210637455457, "percentage": 45.04, "elapsed_time": "0:25:28", "remaining_time": "0:31:04", "throughput": 5495.71, "total_tokens": 8400432} +{"current_steps": 17070, "total_steps": 37885, "loss": 0.0627, "lr": 1.3402493977313476e-06, "epoch": 2.252870529233206, "percentage": 45.06, "elapsed_time": "0:25:28", "remaining_time": "0:31:04", "throughput": 5495.94, "total_tokens": 8402608} +{"current_steps": 17075, "total_steps": 37885, "loss": 0.0002, "lr": 1.3398161509610324e-06, "epoch": 2.253530421010954, "percentage": 45.07, "elapsed_time": "0:25:29", "remaining_time": "0:31:03", "throughput": 5496.21, "total_tokens": 8404848} +{"current_steps": 17080, "total_steps": 37885, "loss": 0.0001, "lr": 1.3393828320672e-06, "epoch": 2.254190312788703, "percentage": 45.08, "elapsed_time": "0:25:29", "remaining_time": "0:31:03", "throughput": 5496.54, "total_tokens": 8407216} +{"current_steps": 17085, "total_steps": 37885, "loss": 0.0001, "lr": 1.3389494411418192e-06, "epoch": 2.254850204566451, "percentage": 45.1, "elapsed_time": "0:25:29", "remaining_time": "0:31:02", "throughput": 5496.91, "total_tokens": 8409648} +{"current_steps": 17090, "total_steps": 37885, "loss": 0.0001, "lr": 1.3385159782768738e-06, "epoch": 2.2555100963441994, "percentage": 45.11, "elapsed_time": "0:25:30", "remaining_time": "0:31:01", "throughput": 5497.26, "total_tokens": 8412016} +{"current_steps": 17095, "total_steps": 37885, "loss": 0.0763, "lr": 1.3380824435643633e-06, "epoch": 2.256169988121948, "percentage": 45.12, "elapsed_time": "0:25:30", "remaining_time": "0:31:01", "throughput": 5497.62, "total_tokens": 8414448} +{"current_steps": 17100, "total_steps": 37885, "loss": 0.1161, "lr": 1.3376488370963027e-06, "epoch": 2.2568298798996964, "percentage": 45.14, "elapsed_time": "0:25:30", "remaining_time": "0:31:00", "throughput": 5497.91, "total_tokens": 8416752} +{"current_steps": 17105, "total_steps": 37885, "loss": 0.0001, "lr": 1.3372151589647212e-06, "epoch": 2.257489771677445, "percentage": 45.15, "elapsed_time": "0:25:31", "remaining_time": "0:31:00", "throughput": 5498.24, "total_tokens": 8419120} +{"current_steps": 17110, "total_steps": 37885, "loss": 0.0876, "lr": 1.3367814092616642e-06, "epoch": 2.2581496634551934, "percentage": 45.16, "elapsed_time": "0:25:31", "remaining_time": "0:30:59", "throughput": 5498.45, "total_tokens": 8421296} +{"current_steps": 17115, "total_steps": 37885, "loss": 0.0389, "lr": 1.336347588079192e-06, "epoch": 2.2588095552329417, "percentage": 45.18, "elapsed_time": "0:25:31", "remaining_time": "0:30:59", "throughput": 5498.69, "total_tokens": 8423536} +{"current_steps": 17120, "total_steps": 37885, "loss": 0.0001, "lr": 1.3359136955093798e-06, "epoch": 2.2594694470106904, "percentage": 45.19, "elapsed_time": "0:25:32", "remaining_time": "0:30:58", "throughput": 5499.15, "total_tokens": 8426096} +{"current_steps": 17125, "total_steps": 37885, "loss": 0.0383, "lr": 1.335479731644318e-06, "epoch": 2.2601293387884387, "percentage": 45.2, "elapsed_time": "0:25:32", "remaining_time": "0:30:57", "throughput": 5499.5, "total_tokens": 8428464} +{"current_steps": 17130, "total_steps": 37885, "loss": 0.0004, "lr": 1.3350456965761127e-06, "epoch": 2.260789230566187, "percentage": 45.22, "elapsed_time": "0:25:32", "remaining_time": "0:30:57", "throughput": 5499.99, "total_tokens": 8431088} +{"current_steps": 17135, "total_steps": 37885, "loss": 0.0001, "lr": 1.3346115903968845e-06, "epoch": 2.2614491223439357, "percentage": 45.23, "elapsed_time": "0:25:33", "remaining_time": "0:30:56", "throughput": 5500.25, "total_tokens": 8433328} +{"current_steps": 17140, "total_steps": 37885, "loss": 0.0002, "lr": 1.3341774131987694e-06, "epoch": 2.262109014121684, "percentage": 45.24, "elapsed_time": "0:25:33", "remaining_time": "0:30:56", "throughput": 5500.62, "total_tokens": 8435760} +{"current_steps": 17145, "total_steps": 37885, "loss": 0.0319, "lr": 1.333743165073918e-06, "epoch": 2.2627689058994322, "percentage": 45.26, "elapsed_time": "0:25:33", "remaining_time": "0:30:55", "throughput": 5500.84, "total_tokens": 8437936} +{"current_steps": 17150, "total_steps": 37885, "loss": 0.0011, "lr": 1.3333088461144967e-06, "epoch": 2.263428797677181, "percentage": 45.27, "elapsed_time": "0:25:34", "remaining_time": "0:30:54", "throughput": 5501.28, "total_tokens": 8440496} +{"current_steps": 17155, "total_steps": 37885, "loss": 0.0517, "lr": 1.3328744564126868e-06, "epoch": 2.2640886894549292, "percentage": 45.28, "elapsed_time": "0:25:34", "remaining_time": "0:30:54", "throughput": 5501.51, "total_tokens": 8442736} +{"current_steps": 17160, "total_steps": 37885, "loss": 0.1567, "lr": 1.3324399960606835e-06, "epoch": 2.264748581232678, "percentage": 45.29, "elapsed_time": "0:25:34", "remaining_time": "0:30:53", "throughput": 5502.03, "total_tokens": 8445424} +{"current_steps": 17165, "total_steps": 37885, "loss": 0.0549, "lr": 1.3320054651506985e-06, "epoch": 2.2654084730104262, "percentage": 45.31, "elapsed_time": "0:25:35", "remaining_time": "0:30:53", "throughput": 5502.53, "total_tokens": 8448048} +{"current_steps": 17170, "total_steps": 37885, "loss": 0.0001, "lr": 1.331570863774958e-06, "epoch": 2.2660683647881745, "percentage": 45.32, "elapsed_time": "0:25:35", "remaining_time": "0:30:52", "throughput": 5502.79, "total_tokens": 8450288} +{"current_steps": 17175, "total_steps": 37885, "loss": 0.0566, "lr": 1.3311361920257024e-06, "epoch": 2.2667282565659232, "percentage": 45.33, "elapsed_time": "0:25:35", "remaining_time": "0:30:52", "throughput": 5503.09, "total_tokens": 8452592} +{"current_steps": 17180, "total_steps": 37885, "loss": 0.0006, "lr": 1.3307014499951882e-06, "epoch": 2.2673881483436715, "percentage": 45.35, "elapsed_time": "0:25:36", "remaining_time": "0:30:51", "throughput": 5503.42, "total_tokens": 8454960} +{"current_steps": 17185, "total_steps": 37885, "loss": 0.0003, "lr": 1.3302666377756859e-06, "epoch": 2.2680480401214203, "percentage": 45.36, "elapsed_time": "0:25:36", "remaining_time": "0:30:50", "throughput": 5503.76, "total_tokens": 8457328} +{"current_steps": 17190, "total_steps": 37885, "loss": 0.083, "lr": 1.3298317554594813e-06, "epoch": 2.2687079318991685, "percentage": 45.37, "elapsed_time": "0:25:36", "remaining_time": "0:30:50", "throughput": 5504.18, "total_tokens": 8459824} +{"current_steps": 17195, "total_steps": 37885, "loss": 0.0001, "lr": 1.3293968031388752e-06, "epoch": 2.269367823676917, "percentage": 45.39, "elapsed_time": "0:25:37", "remaining_time": "0:30:49", "throughput": 5504.66, "total_tokens": 8462448} +{"current_steps": 17200, "total_steps": 37885, "loss": 0.1421, "lr": 1.3289617809061827e-06, "epoch": 2.2700277154546655, "percentage": 45.4, "elapsed_time": "0:25:37", "remaining_time": "0:30:49", "throughput": 5504.96, "total_tokens": 8464752} +{"current_steps": 17205, "total_steps": 37885, "loss": 0.0642, "lr": 1.3285266888537346e-06, "epoch": 2.270687607232414, "percentage": 45.41, "elapsed_time": "0:25:37", "remaining_time": "0:30:48", "throughput": 5505.34, "total_tokens": 8467184} +{"current_steps": 17210, "total_steps": 37885, "loss": 0.1786, "lr": 1.3280915270738754e-06, "epoch": 2.2713474990101625, "percentage": 45.43, "elapsed_time": "0:25:38", "remaining_time": "0:30:48", "throughput": 5505.76, "total_tokens": 8469680} +{"current_steps": 17215, "total_steps": 37885, "loss": 0.0017, "lr": 1.3276562956589656e-06, "epoch": 2.272007390787911, "percentage": 45.44, "elapsed_time": "0:25:38", "remaining_time": "0:30:47", "throughput": 5506.01, "total_tokens": 8471920} +{"current_steps": 17220, "total_steps": 37885, "loss": 0.0002, "lr": 1.32722099470138e-06, "epoch": 2.272667282565659, "percentage": 45.45, "elapsed_time": "0:25:39", "remaining_time": "0:30:46", "throughput": 5506.53, "total_tokens": 8474608} +{"current_steps": 17225, "total_steps": 37885, "loss": 0.0253, "lr": 1.3267856242935076e-06, "epoch": 2.273327174343408, "percentage": 45.47, "elapsed_time": "0:25:39", "remaining_time": "0:30:46", "throughput": 5506.77, "total_tokens": 8476848} +{"current_steps": 17230, "total_steps": 37885, "loss": 0.0448, "lr": 1.3263501845277528e-06, "epoch": 2.273987066121156, "percentage": 45.48, "elapsed_time": "0:25:39", "remaining_time": "0:30:45", "throughput": 5507.14, "total_tokens": 8479280} +{"current_steps": 17235, "total_steps": 37885, "loss": 0.0008, "lr": 1.3259146754965346e-06, "epoch": 2.274646957898905, "percentage": 45.49, "elapsed_time": "0:25:40", "remaining_time": "0:30:45", "throughput": 5507.57, "total_tokens": 8481776} +{"current_steps": 17240, "total_steps": 37885, "loss": 0.0031, "lr": 1.3254790972922867e-06, "epoch": 2.275306849676653, "percentage": 45.51, "elapsed_time": "0:25:40", "remaining_time": "0:30:44", "throughput": 5507.98, "total_tokens": 8484208} +{"current_steps": 17245, "total_steps": 37885, "loss": 0.047, "lr": 1.3250434500074574e-06, "epoch": 2.2759667414544014, "percentage": 45.52, "elapsed_time": "0:25:40", "remaining_time": "0:30:43", "throughput": 5508.48, "total_tokens": 8486832} +{"current_steps": 17250, "total_steps": 37885, "loss": 0.0611, "lr": 1.3246077337345097e-06, "epoch": 2.27662663323215, "percentage": 45.53, "elapsed_time": "0:25:41", "remaining_time": "0:30:43", "throughput": 5508.91, "total_tokens": 8489328} +{"current_steps": 17255, "total_steps": 37885, "loss": 0.0427, "lr": 1.3241719485659206e-06, "epoch": 2.2772865250098984, "percentage": 45.55, "elapsed_time": "0:25:41", "remaining_time": "0:30:42", "throughput": 5509.28, "total_tokens": 8491696} +{"current_steps": 17260, "total_steps": 37885, "loss": 0.0004, "lr": 1.3237360945941834e-06, "epoch": 2.2779464167876466, "percentage": 45.56, "elapsed_time": "0:25:41", "remaining_time": "0:30:42", "throughput": 5509.8, "total_tokens": 8494320} +{"current_steps": 17265, "total_steps": 37885, "loss": 0.0442, "lr": 1.3233001719118043e-06, "epoch": 2.2786063085653954, "percentage": 45.57, "elapsed_time": "0:25:42", "remaining_time": "0:30:41", "throughput": 5510.08, "total_tokens": 8496560} +{"current_steps": 17270, "total_steps": 37885, "loss": 0.1099, "lr": 1.3228641806113047e-06, "epoch": 2.2792662003431436, "percentage": 45.59, "elapsed_time": "0:25:42", "remaining_time": "0:30:41", "throughput": 5510.46, "total_tokens": 8498928} +{"current_steps": 17275, "total_steps": 37885, "loss": 0.0006, "lr": 1.3224281207852213e-06, "epoch": 2.2799260921208924, "percentage": 45.6, "elapsed_time": "0:25:42", "remaining_time": "0:30:40", "throughput": 5510.97, "total_tokens": 8501552} +{"current_steps": 17280, "total_steps": 37885, "loss": 0.0854, "lr": 1.3219919925261034e-06, "epoch": 2.2805859838986406, "percentage": 45.61, "elapsed_time": "0:25:42", "remaining_time": "0:30:39", "throughput": 5511.26, "total_tokens": 8503792} +{"current_steps": 17285, "total_steps": 37885, "loss": 0.0475, "lr": 1.321555795926517e-06, "epoch": 2.281245875676389, "percentage": 45.62, "elapsed_time": "0:25:43", "remaining_time": "0:30:39", "throughput": 5511.38, "total_tokens": 8505776} +{"current_steps": 17290, "total_steps": 37885, "loss": 0.0001, "lr": 1.3211195310790415e-06, "epoch": 2.2819057674541376, "percentage": 45.64, "elapsed_time": "0:25:43", "remaining_time": "0:30:38", "throughput": 5511.82, "total_tokens": 8508272} +{"current_steps": 17295, "total_steps": 37885, "loss": 0.0002, "lr": 1.3206831980762712e-06, "epoch": 2.282565659231886, "percentage": 45.65, "elapsed_time": "0:25:43", "remaining_time": "0:30:38", "throughput": 5512.26, "total_tokens": 8510768} +{"current_steps": 17300, "total_steps": 37885, "loss": 0.0001, "lr": 1.320246797010814e-06, "epoch": 2.283225551009634, "percentage": 45.66, "elapsed_time": "0:25:44", "remaining_time": "0:30:37", "throughput": 5512.85, "total_tokens": 8513520} +{"current_steps": 17305, "total_steps": 37885, "loss": 0.0009, "lr": 1.319810327975293e-06, "epoch": 2.283885442787383, "percentage": 45.68, "elapsed_time": "0:25:44", "remaining_time": "0:30:36", "throughput": 5513.33, "total_tokens": 8516080} +{"current_steps": 17310, "total_steps": 37885, "loss": 0.0675, "lr": 1.3193737910623462e-06, "epoch": 2.284545334565131, "percentage": 45.69, "elapsed_time": "0:25:44", "remaining_time": "0:30:36", "throughput": 5513.7, "total_tokens": 8518448} +{"current_steps": 17315, "total_steps": 37885, "loss": 0.0, "lr": 1.3189371863646246e-06, "epoch": 2.28520522634288, "percentage": 45.7, "elapsed_time": "0:25:45", "remaining_time": "0:30:35", "throughput": 5513.94, "total_tokens": 8520624} +{"current_steps": 17320, "total_steps": 37885, "loss": 0.1649, "lr": 1.318500513974795e-06, "epoch": 2.285865118120628, "percentage": 45.72, "elapsed_time": "0:25:45", "remaining_time": "0:30:35", "throughput": 5514.45, "total_tokens": 8523248} +{"current_steps": 17325, "total_steps": 37885, "loss": 0.0001, "lr": 1.3180637739855376e-06, "epoch": 2.2865250098983765, "percentage": 45.73, "elapsed_time": "0:25:45", "remaining_time": "0:30:34", "throughput": 5514.76, "total_tokens": 8525552} +{"current_steps": 17330, "total_steps": 37885, "loss": 0.0008, "lr": 1.3176269664895476e-06, "epoch": 2.287184901676125, "percentage": 45.74, "elapsed_time": "0:25:46", "remaining_time": "0:30:34", "throughput": 5515.18, "total_tokens": 8528048} +{"current_steps": 17335, "total_steps": 37885, "loss": 0.0557, "lr": 1.3171900915795338e-06, "epoch": 2.2878447934538735, "percentage": 45.76, "elapsed_time": "0:25:46", "remaining_time": "0:30:33", "throughput": 5515.58, "total_tokens": 8530480} +{"current_steps": 17340, "total_steps": 37885, "loss": 0.0002, "lr": 1.31675314934822e-06, "epoch": 2.288504685231622, "percentage": 45.77, "elapsed_time": "0:25:46", "remaining_time": "0:30:32", "throughput": 5516.05, "total_tokens": 8533040} +{"current_steps": 17345, "total_steps": 37885, "loss": 0.1025, "lr": 1.316316139888344e-06, "epoch": 2.2891645770093705, "percentage": 45.78, "elapsed_time": "0:25:47", "remaining_time": "0:30:32", "throughput": 5516.49, "total_tokens": 8535536} +{"current_steps": 17350, "total_steps": 37885, "loss": 0.0009, "lr": 1.3158790632926579e-06, "epoch": 2.2898244687871188, "percentage": 45.8, "elapsed_time": "0:25:47", "remaining_time": "0:30:31", "throughput": 5516.93, "total_tokens": 8538032} +{"current_steps": 17355, "total_steps": 37885, "loss": 0.0595, "lr": 1.3154419196539281e-06, "epoch": 2.2904843605648675, "percentage": 45.81, "elapsed_time": "0:25:47", "remaining_time": "0:30:31", "throughput": 5517.36, "total_tokens": 8540528} +{"current_steps": 17360, "total_steps": 37885, "loss": 0.0001, "lr": 1.315004709064935e-06, "epoch": 2.2911442523426158, "percentage": 45.82, "elapsed_time": "0:25:48", "remaining_time": "0:30:30", "throughput": 5517.8, "total_tokens": 8543024} +{"current_steps": 17365, "total_steps": 37885, "loss": 0.0565, "lr": 1.3145674316184736e-06, "epoch": 2.2918041441203645, "percentage": 45.84, "elapsed_time": "0:25:48", "remaining_time": "0:30:29", "throughput": 5518.22, "total_tokens": 8545520} +{"current_steps": 17370, "total_steps": 37885, "loss": 0.0457, "lr": 1.3141300874073524e-06, "epoch": 2.2924640358981128, "percentage": 45.85, "elapsed_time": "0:25:48", "remaining_time": "0:30:29", "throughput": 5518.65, "total_tokens": 8548016} +{"current_steps": 17375, "total_steps": 37885, "loss": 0.0002, "lr": 1.3136926765243955e-06, "epoch": 2.293123927675861, "percentage": 45.86, "elapsed_time": "0:25:49", "remaining_time": "0:30:28", "throughput": 5519.07, "total_tokens": 8550512} +{"current_steps": 17380, "total_steps": 37885, "loss": 0.055, "lr": 1.3132551990624392e-06, "epoch": 2.2937838194536098, "percentage": 45.88, "elapsed_time": "0:25:49", "remaining_time": "0:30:28", "throughput": 5519.38, "total_tokens": 8552816} +{"current_steps": 17385, "total_steps": 37885, "loss": 0.06, "lr": 1.3128176551143352e-06, "epoch": 2.294443711231358, "percentage": 45.89, "elapsed_time": "0:25:49", "remaining_time": "0:30:27", "throughput": 5519.81, "total_tokens": 8555312} +{"current_steps": 17390, "total_steps": 37885, "loss": 0.0493, "lr": 1.3123800447729497e-06, "epoch": 2.2951036030091063, "percentage": 45.9, "elapsed_time": "0:25:50", "remaining_time": "0:30:27", "throughput": 5520.08, "total_tokens": 8557552} +{"current_steps": 17395, "total_steps": 37885, "loss": 0.0001, "lr": 1.3119423681311612e-06, "epoch": 2.295763494786855, "percentage": 45.92, "elapsed_time": "0:25:50", "remaining_time": "0:30:26", "throughput": 5520.45, "total_tokens": 8559920} +{"current_steps": 17400, "total_steps": 37885, "loss": 0.0001, "lr": 1.3115046252818644e-06, "epoch": 2.2964233865646033, "percentage": 45.93, "elapsed_time": "0:25:50", "remaining_time": "0:30:25", "throughput": 5520.94, "total_tokens": 8562544} +{"current_steps": 17405, "total_steps": 37885, "loss": 0.0001, "lr": 1.3110668163179664e-06, "epoch": 2.297083278342352, "percentage": 45.94, "elapsed_time": "0:25:51", "remaining_time": "0:30:25", "throughput": 5521.43, "total_tokens": 8565168} +{"current_steps": 17410, "total_steps": 37885, "loss": 0.0382, "lr": 1.3106289413323891e-06, "epoch": 2.2977431701201003, "percentage": 45.95, "elapsed_time": "0:25:51", "remaining_time": "0:30:24", "throughput": 5521.88, "total_tokens": 8567664} +{"current_steps": 17415, "total_steps": 37885, "loss": 0.006, "lr": 1.3101910004180685e-06, "epoch": 2.2984030618978486, "percentage": 45.97, "elapsed_time": "0:25:51", "remaining_time": "0:30:24", "throughput": 5522.06, "total_tokens": 8569776} +{"current_steps": 17420, "total_steps": 37885, "loss": 0.0004, "lr": 1.3097529936679545e-06, "epoch": 2.2990629536755973, "percentage": 45.98, "elapsed_time": "0:25:52", "remaining_time": "0:30:23", "throughput": 5522.29, "total_tokens": 8571952} +{"current_steps": 17425, "total_steps": 37885, "loss": 0.0001, "lr": 1.3093149211750105e-06, "epoch": 2.2997228454533456, "percentage": 45.99, "elapsed_time": "0:25:52", "remaining_time": "0:30:22", "throughput": 5522.68, "total_tokens": 8574384} +{"current_steps": 17430, "total_steps": 37885, "loss": 0.0007, "lr": 1.3088767830322142e-06, "epoch": 2.300382737231094, "percentage": 46.01, "elapsed_time": "0:25:52", "remaining_time": "0:30:22", "throughput": 5523.08, "total_tokens": 8576816} +{"current_steps": 17435, "total_steps": 37885, "loss": 0.0389, "lr": 1.3084385793325575e-06, "epoch": 2.3010426290088426, "percentage": 46.02, "elapsed_time": "0:25:53", "remaining_time": "0:30:21", "throughput": 5523.43, "total_tokens": 8579184} +{"current_steps": 17440, "total_steps": 37885, "loss": 0.0005, "lr": 1.308000310169046e-06, "epoch": 2.301702520786591, "percentage": 46.03, "elapsed_time": "0:25:53", "remaining_time": "0:30:21", "throughput": 5523.83, "total_tokens": 8581616} +{"current_steps": 17445, "total_steps": 37885, "loss": 0.0001, "lr": 1.307561975634699e-06, "epoch": 2.3023624125643396, "percentage": 46.05, "elapsed_time": "0:25:53", "remaining_time": "0:30:20", "throughput": 5524.23, "total_tokens": 8584048} +{"current_steps": 17450, "total_steps": 37885, "loss": 0.0, "lr": 1.3071235758225497e-06, "epoch": 2.303022304342088, "percentage": 46.06, "elapsed_time": "0:25:54", "remaining_time": "0:30:20", "throughput": 5524.51, "total_tokens": 8586288} +{"current_steps": 17455, "total_steps": 37885, "loss": 0.0002, "lr": 1.3066851108256457e-06, "epoch": 2.303682196119836, "percentage": 46.07, "elapsed_time": "0:25:54", "remaining_time": "0:30:19", "throughput": 5524.94, "total_tokens": 8588784} +{"current_steps": 17460, "total_steps": 37885, "loss": 0.0577, "lr": 1.3062465807370475e-06, "epoch": 2.304342087897585, "percentage": 46.09, "elapsed_time": "0:25:54", "remaining_time": "0:30:18", "throughput": 5525.32, "total_tokens": 8591216} +{"current_steps": 17465, "total_steps": 37885, "loss": 0.1142, "lr": 1.3058079856498302e-06, "epoch": 2.305001979675333, "percentage": 46.1, "elapsed_time": "0:25:55", "remaining_time": "0:30:18", "throughput": 5525.85, "total_tokens": 8593904} +{"current_steps": 17470, "total_steps": 37885, "loss": 0.0, "lr": 1.3053693256570829e-06, "epoch": 2.305661871453082, "percentage": 46.11, "elapsed_time": "0:25:55", "remaining_time": "0:30:17", "throughput": 5526.16, "total_tokens": 8596208} +{"current_steps": 17475, "total_steps": 37885, "loss": 0.085, "lr": 1.304930600851907e-06, "epoch": 2.30632176323083, "percentage": 46.13, "elapsed_time": "0:25:55", "remaining_time": "0:30:17", "throughput": 5526.61, "total_tokens": 8598768} +{"current_steps": 17480, "total_steps": 37885, "loss": 0.0002, "lr": 1.3044918113274195e-06, "epoch": 2.3069816550085784, "percentage": 46.14, "elapsed_time": "0:25:56", "remaining_time": "0:30:16", "throughput": 5526.88, "total_tokens": 8601008} +{"current_steps": 17485, "total_steps": 37885, "loss": 0.127, "lr": 1.3040529571767498e-06, "epoch": 2.307641546786327, "percentage": 46.15, "elapsed_time": "0:25:56", "remaining_time": "0:30:16", "throughput": 5527.38, "total_tokens": 8603632} +{"current_steps": 17490, "total_steps": 37885, "loss": 0.0004, "lr": 1.3036140384930416e-06, "epoch": 2.3083014385640754, "percentage": 46.17, "elapsed_time": "0:25:56", "remaining_time": "0:30:15", "throughput": 5527.65, "total_tokens": 8605872} +{"current_steps": 17495, "total_steps": 37885, "loss": 0.0001, "lr": 1.3031750553694528e-06, "epoch": 2.308961330341824, "percentage": 46.18, "elapsed_time": "0:25:57", "remaining_time": "0:30:14", "throughput": 5528.12, "total_tokens": 8608432} +{"current_steps": 17500, "total_steps": 37885, "loss": 0.0001, "lr": 1.3027360078991535e-06, "epoch": 2.3096212221195724, "percentage": 46.19, "elapsed_time": "0:25:57", "remaining_time": "0:30:14", "throughput": 5528.42, "total_tokens": 8610736} +{"current_steps": 17505, "total_steps": 37885, "loss": 0.0782, "lr": 1.302296896175329e-06, "epoch": 2.3102811138973207, "percentage": 46.21, "elapsed_time": "0:25:57", "remaining_time": "0:30:13", "throughput": 5529.06, "total_tokens": 8613616} +{"current_steps": 17510, "total_steps": 37885, "loss": 0.0256, "lr": 1.3018577202911774e-06, "epoch": 2.3109410056750694, "percentage": 46.22, "elapsed_time": "0:25:58", "remaining_time": "0:30:13", "throughput": 5529.44, "total_tokens": 8616048} +{"current_steps": 17515, "total_steps": 37885, "loss": 0.1737, "lr": 1.3014184803399104e-06, "epoch": 2.3116008974528177, "percentage": 46.23, "elapsed_time": "0:25:58", "remaining_time": "0:30:12", "throughput": 5529.66, "total_tokens": 8618224} +{"current_steps": 17520, "total_steps": 37885, "loss": 0.0467, "lr": 1.3009791764147537e-06, "epoch": 2.312260789230566, "percentage": 46.25, "elapsed_time": "0:25:58", "remaining_time": "0:30:12", "throughput": 5530.1, "total_tokens": 8620784} +{"current_steps": 17525, "total_steps": 37885, "loss": 0.0, "lr": 1.3005398086089462e-06, "epoch": 2.3129206810083147, "percentage": 46.26, "elapsed_time": "0:25:59", "remaining_time": "0:30:11", "throughput": 5530.42, "total_tokens": 8623152} +{"current_steps": 17530, "total_steps": 37885, "loss": 0.034, "lr": 1.3001003770157409e-06, "epoch": 2.313580572786063, "percentage": 46.27, "elapsed_time": "0:25:59", "remaining_time": "0:30:10", "throughput": 5530.72, "total_tokens": 8625456} +{"current_steps": 17535, "total_steps": 37885, "loss": 0.0005, "lr": 1.2996608817284033e-06, "epoch": 2.3142404645638117, "percentage": 46.28, "elapsed_time": "0:25:59", "remaining_time": "0:30:10", "throughput": 5531.14, "total_tokens": 8627952} +{"current_steps": 17540, "total_steps": 37885, "loss": 0.0003, "lr": 1.2992213228402142e-06, "epoch": 2.31490035634156, "percentage": 46.3, "elapsed_time": "0:26:00", "remaining_time": "0:30:09", "throughput": 5531.68, "total_tokens": 8630640} +{"current_steps": 17545, "total_steps": 37885, "loss": 0.0008, "lr": 1.2987817004444654e-06, "epoch": 2.3155602481193083, "percentage": 46.31, "elapsed_time": "0:26:00", "remaining_time": "0:30:09", "throughput": 5532.33, "total_tokens": 8633520} +{"current_steps": 17550, "total_steps": 37885, "loss": 0.0799, "lr": 1.2983420146344648e-06, "epoch": 2.316220139897057, "percentage": 46.32, "elapsed_time": "0:26:00", "remaining_time": "0:30:08", "throughput": 5532.84, "total_tokens": 8636208} +{"current_steps": 17555, "total_steps": 37885, "loss": 0.0427, "lr": 1.297902265503532e-06, "epoch": 2.3168800316748053, "percentage": 46.34, "elapsed_time": "0:26:01", "remaining_time": "0:30:08", "throughput": 5533.12, "total_tokens": 8638512} +{"current_steps": 17560, "total_steps": 37885, "loss": 0.1341, "lr": 1.2974624531450003e-06, "epoch": 2.3175399234525536, "percentage": 46.35, "elapsed_time": "0:26:01", "remaining_time": "0:30:07", "throughput": 5533.47, "total_tokens": 8640944} +{"current_steps": 17565, "total_steps": 37885, "loss": 0.1493, "lr": 1.2970225776522172e-06, "epoch": 2.3181998152303023, "percentage": 46.36, "elapsed_time": "0:26:01", "remaining_time": "0:30:06", "throughput": 5534.0, "total_tokens": 8643632} +{"current_steps": 17570, "total_steps": 37885, "loss": 0.018, "lr": 1.2965826391185425e-06, "epoch": 2.3188597070080506, "percentage": 46.38, "elapsed_time": "0:26:02", "remaining_time": "0:30:06", "throughput": 5534.36, "total_tokens": 8646064} +{"current_steps": 17575, "total_steps": 37885, "loss": 0.0023, "lr": 1.2961426376373507e-06, "epoch": 2.3195195987857993, "percentage": 46.39, "elapsed_time": "0:26:02", "remaining_time": "0:30:05", "throughput": 5534.76, "total_tokens": 8648560} +{"current_steps": 17580, "total_steps": 37885, "loss": 0.0002, "lr": 1.2957025733020285e-06, "epoch": 2.3201794905635476, "percentage": 46.4, "elapsed_time": "0:26:02", "remaining_time": "0:30:05", "throughput": 5535.17, "total_tokens": 8651056} +{"current_steps": 17585, "total_steps": 37885, "loss": 0.0002, "lr": 1.2952624462059767e-06, "epoch": 2.320839382341296, "percentage": 46.42, "elapsed_time": "0:26:03", "remaining_time": "0:30:04", "throughput": 5535.56, "total_tokens": 8653552} +{"current_steps": 17590, "total_steps": 37885, "loss": 0.0009, "lr": 1.2948222564426083e-06, "epoch": 2.3214992741190446, "percentage": 46.43, "elapsed_time": "0:26:03", "remaining_time": "0:30:04", "throughput": 5535.97, "total_tokens": 8656048} +{"current_steps": 17595, "total_steps": 37885, "loss": 0.0003, "lr": 1.2943820041053512e-06, "epoch": 2.322159165896793, "percentage": 46.44, "elapsed_time": "0:26:03", "remaining_time": "0:30:03", "throughput": 5536.25, "total_tokens": 8658352} +{"current_steps": 17600, "total_steps": 37885, "loss": 0.0001, "lr": 1.2939416892876451e-06, "epoch": 2.3228190576745416, "percentage": 46.46, "elapsed_time": "0:26:04", "remaining_time": "0:30:02", "throughput": 5536.57, "total_tokens": 8660720} +{"current_steps": 17605, "total_steps": 37885, "loss": 0.0005, "lr": 1.2935013120829443e-06, "epoch": 2.32347894945229, "percentage": 46.47, "elapsed_time": "0:26:04", "remaining_time": "0:30:02", "throughput": 5536.85, "total_tokens": 8663024} +{"current_steps": 17610, "total_steps": 37885, "loss": 0.0411, "lr": 1.2930608725847156e-06, "epoch": 2.324138841230038, "percentage": 46.48, "elapsed_time": "0:26:04", "remaining_time": "0:30:01", "throughput": 5537.2, "total_tokens": 8665392} +{"current_steps": 17615, "total_steps": 37885, "loss": 0.0001, "lr": 1.2926203708864385e-06, "epoch": 2.324798733007787, "percentage": 46.5, "elapsed_time": "0:26:05", "remaining_time": "0:30:01", "throughput": 5537.56, "total_tokens": 8667824} +{"current_steps": 17620, "total_steps": 37885, "loss": 0.1861, "lr": 1.2921798070816068e-06, "epoch": 2.325458624785535, "percentage": 46.51, "elapsed_time": "0:26:05", "remaining_time": "0:30:00", "throughput": 5538.03, "total_tokens": 8670448} +{"current_steps": 17625, "total_steps": 37885, "loss": 0.0, "lr": 1.2917391812637269e-06, "epoch": 2.326118516563284, "percentage": 46.52, "elapsed_time": "0:26:05", "remaining_time": "0:30:00", "throughput": 5538.46, "total_tokens": 8672944} +{"current_steps": 17630, "total_steps": 37885, "loss": 0.0659, "lr": 1.2912984935263183e-06, "epoch": 2.326778408341032, "percentage": 46.54, "elapsed_time": "0:26:06", "remaining_time": "0:29:59", "throughput": 5538.77, "total_tokens": 8675248} +{"current_steps": 17635, "total_steps": 37885, "loss": 0.0007, "lr": 1.290857743962914e-06, "epoch": 2.3274383001187804, "percentage": 46.55, "elapsed_time": "0:26:06", "remaining_time": "0:29:58", "throughput": 5539.15, "total_tokens": 8677680} +{"current_steps": 17640, "total_steps": 37885, "loss": 0.0005, "lr": 1.2904169326670596e-06, "epoch": 2.328098191896529, "percentage": 46.56, "elapsed_time": "0:26:06", "remaining_time": "0:29:58", "throughput": 5539.48, "total_tokens": 8680048} +{"current_steps": 17645, "total_steps": 37885, "loss": 0.0002, "lr": 1.2899760597323144e-06, "epoch": 2.3287580836742774, "percentage": 46.58, "elapsed_time": "0:26:07", "remaining_time": "0:29:57", "throughput": 5539.69, "total_tokens": 8682224} +{"current_steps": 17650, "total_steps": 37885, "loss": 0.0956, "lr": 1.2895351252522502e-06, "epoch": 2.329417975452026, "percentage": 46.59, "elapsed_time": "0:26:07", "remaining_time": "0:29:57", "throughput": 5540.12, "total_tokens": 8684784} +{"current_steps": 17655, "total_steps": 37885, "loss": 0.0431, "lr": 1.2890941293204525e-06, "epoch": 2.3300778672297744, "percentage": 46.6, "elapsed_time": "0:26:07", "remaining_time": "0:29:56", "throughput": 5540.41, "total_tokens": 8687088} +{"current_steps": 17660, "total_steps": 37885, "loss": 0.0472, "lr": 1.2886530720305193e-06, "epoch": 2.3307377590075227, "percentage": 46.61, "elapsed_time": "0:26:08", "remaining_time": "0:29:56", "throughput": 5540.61, "total_tokens": 8689264} +{"current_steps": 17665, "total_steps": 37885, "loss": 0.147, "lr": 1.2882119534760618e-06, "epoch": 2.3313976507852714, "percentage": 46.63, "elapsed_time": "0:26:08", "remaining_time": "0:29:55", "throughput": 5541.03, "total_tokens": 8691760} +{"current_steps": 17670, "total_steps": 37885, "loss": 0.0008, "lr": 1.2877707737507043e-06, "epoch": 2.3320575425630197, "percentage": 46.64, "elapsed_time": "0:26:08", "remaining_time": "0:29:54", "throughput": 5541.37, "total_tokens": 8694128} +{"current_steps": 17675, "total_steps": 37885, "loss": 0.0005, "lr": 1.2873295329480837e-06, "epoch": 2.332717434340768, "percentage": 46.65, "elapsed_time": "0:26:09", "remaining_time": "0:29:54", "throughput": 5541.82, "total_tokens": 8696688} +{"current_steps": 17680, "total_steps": 37885, "loss": 0.1152, "lr": 1.2868882311618505e-06, "epoch": 2.3333773261185167, "percentage": 46.67, "elapsed_time": "0:26:09", "remaining_time": "0:29:53", "throughput": 5542.19, "total_tokens": 8699120} +{"current_steps": 17685, "total_steps": 37885, "loss": 0.0001, "lr": 1.286446868485668e-06, "epoch": 2.334037217896265, "percentage": 46.68, "elapsed_time": "0:26:09", "remaining_time": "0:29:53", "throughput": 5542.58, "total_tokens": 8701552} +{"current_steps": 17690, "total_steps": 37885, "loss": 0.0001, "lr": 1.2860054450132116e-06, "epoch": 2.3346971096740132, "percentage": 46.69, "elapsed_time": "0:26:10", "remaining_time": "0:29:52", "throughput": 5543.01, "total_tokens": 8704048} +{"current_steps": 17695, "total_steps": 37885, "loss": 0.0014, "lr": 1.2855639608381706e-06, "epoch": 2.335357001451762, "percentage": 46.71, "elapsed_time": "0:26:10", "remaining_time": "0:29:52", "throughput": 5543.4, "total_tokens": 8706480} +{"current_steps": 17700, "total_steps": 37885, "loss": 0.0017, "lr": 1.2851224160542472e-06, "epoch": 2.3360168932295102, "percentage": 46.72, "elapsed_time": "0:26:10", "remaining_time": "0:29:51", "throughput": 5543.86, "total_tokens": 8709040} +{"current_steps": 17705, "total_steps": 37885, "loss": 0.0613, "lr": 1.2846808107551553e-06, "epoch": 2.336676785007259, "percentage": 46.73, "elapsed_time": "0:26:11", "remaining_time": "0:29:50", "throughput": 5544.25, "total_tokens": 8711472} +{"current_steps": 17710, "total_steps": 37885, "loss": 0.0097, "lr": 1.2842391450346228e-06, "epoch": 2.3373366767850072, "percentage": 46.75, "elapsed_time": "0:26:11", "remaining_time": "0:29:50", "throughput": 5544.64, "total_tokens": 8713904} +{"current_steps": 17715, "total_steps": 37885, "loss": 0.1691, "lr": 1.2837974189863902e-06, "epoch": 2.3379965685627555, "percentage": 46.76, "elapsed_time": "0:26:11", "remaining_time": "0:29:49", "throughput": 5544.92, "total_tokens": 8716144} +{"current_steps": 17720, "total_steps": 37885, "loss": 0.0004, "lr": 1.2833556327042105e-06, "epoch": 2.3386564603405042, "percentage": 46.77, "elapsed_time": "0:26:12", "remaining_time": "0:29:49", "throughput": 5545.23, "total_tokens": 8718448} +{"current_steps": 17725, "total_steps": 37885, "loss": 0.114, "lr": 1.2829137862818496e-06, "epoch": 2.3393163521182525, "percentage": 46.79, "elapsed_time": "0:26:12", "remaining_time": "0:29:48", "throughput": 5545.45, "total_tokens": 8720624} +{"current_steps": 17730, "total_steps": 37885, "loss": 0.0005, "lr": 1.2824718798130862e-06, "epoch": 2.3399762438960012, "percentage": 46.8, "elapsed_time": "0:26:12", "remaining_time": "0:29:48", "throughput": 5545.98, "total_tokens": 8723312} +{"current_steps": 17735, "total_steps": 37885, "loss": 0.1246, "lr": 1.2820299133917122e-06, "epoch": 2.3406361356737495, "percentage": 46.81, "elapsed_time": "0:26:13", "remaining_time": "0:29:47", "throughput": 5546.34, "total_tokens": 8725680} +{"current_steps": 17740, "total_steps": 37885, "loss": 0.0008, "lr": 1.281587887111531e-06, "epoch": 2.341296027451498, "percentage": 46.83, "elapsed_time": "0:26:13", "remaining_time": "0:29:46", "throughput": 5546.66, "total_tokens": 8727984} +{"current_steps": 17745, "total_steps": 37885, "loss": 0.0371, "lr": 1.28114580106636e-06, "epoch": 2.3419559192292465, "percentage": 46.84, "elapsed_time": "0:26:13", "remaining_time": "0:29:46", "throughput": 5547.04, "total_tokens": 8730416} +{"current_steps": 17750, "total_steps": 37885, "loss": 0.0004, "lr": 1.2807036553500286e-06, "epoch": 2.342615811006995, "percentage": 46.85, "elapsed_time": "0:26:14", "remaining_time": "0:29:45", "throughput": 5547.58, "total_tokens": 8733104} +{"current_steps": 17755, "total_steps": 37885, "loss": 0.0003, "lr": 1.280261450056379e-06, "epoch": 2.3432757027847435, "percentage": 46.87, "elapsed_time": "0:26:14", "remaining_time": "0:29:45", "throughput": 5548.01, "total_tokens": 8735600} +{"current_steps": 17760, "total_steps": 37885, "loss": 0.0001, "lr": 1.2798191852792662e-06, "epoch": 2.343935594562492, "percentage": 46.88, "elapsed_time": "0:26:14", "remaining_time": "0:29:44", "throughput": 5548.39, "total_tokens": 8738032} +{"current_steps": 17765, "total_steps": 37885, "loss": 0.0002, "lr": 1.2793768611125576e-06, "epoch": 2.34459548634024, "percentage": 46.89, "elapsed_time": "0:26:15", "remaining_time": "0:29:44", "throughput": 5548.76, "total_tokens": 8740464} +{"current_steps": 17770, "total_steps": 37885, "loss": 0.0731, "lr": 1.2789344776501333e-06, "epoch": 2.345255378117989, "percentage": 46.91, "elapsed_time": "0:26:15", "remaining_time": "0:29:43", "throughput": 5549.19, "total_tokens": 8742960} +{"current_steps": 17775, "total_steps": 37885, "loss": 0.0001, "lr": 1.2784920349858858e-06, "epoch": 2.345915269895737, "percentage": 46.92, "elapsed_time": "0:26:15", "remaining_time": "0:29:42", "throughput": 5549.71, "total_tokens": 8745648} +{"current_steps": 17780, "total_steps": 37885, "loss": 0.0027, "lr": 1.278049533213721e-06, "epoch": 2.346575161673486, "percentage": 46.93, "elapsed_time": "0:26:16", "remaining_time": "0:29:42", "throughput": 5550.21, "total_tokens": 8748272} +{"current_steps": 17785, "total_steps": 37885, "loss": 0.0004, "lr": 1.2776069724275557e-06, "epoch": 2.347235053451234, "percentage": 46.94, "elapsed_time": "0:26:16", "remaining_time": "0:29:41", "throughput": 5550.67, "total_tokens": 8750832} +{"current_steps": 17790, "total_steps": 37885, "loss": 0.1713, "lr": 1.277164352721321e-06, "epoch": 2.3478949452289823, "percentage": 46.96, "elapsed_time": "0:26:16", "remaining_time": "0:29:41", "throughput": 5551.02, "total_tokens": 8753200} +{"current_steps": 17795, "total_steps": 37885, "loss": 0.0009, "lr": 1.27672167418896e-06, "epoch": 2.348554837006731, "percentage": 46.97, "elapsed_time": "0:26:17", "remaining_time": "0:29:40", "throughput": 5551.52, "total_tokens": 8755824} +{"current_steps": 17800, "total_steps": 37885, "loss": 0.0133, "lr": 1.276278936924427e-06, "epoch": 2.3492147287844793, "percentage": 46.98, "elapsed_time": "0:26:17", "remaining_time": "0:29:40", "throughput": 5551.82, "total_tokens": 8758128} +{"current_steps": 17805, "total_steps": 37885, "loss": 0.0009, "lr": 1.2758361410216902e-06, "epoch": 2.3498746205622276, "percentage": 47.0, "elapsed_time": "0:26:17", "remaining_time": "0:29:39", "throughput": 5552.21, "total_tokens": 8760624} +{"current_steps": 17810, "total_steps": 37885, "loss": 0.0004, "lr": 1.2753932865747302e-06, "epoch": 2.3505345123399763, "percentage": 47.01, "elapsed_time": "0:26:18", "remaining_time": "0:29:38", "throughput": 5552.48, "total_tokens": 8762864} +{"current_steps": 17815, "total_steps": 37885, "loss": 0.0598, "lr": 1.2749503736775395e-06, "epoch": 2.3511944041177246, "percentage": 47.02, "elapsed_time": "0:26:18", "remaining_time": "0:29:38", "throughput": 5552.93, "total_tokens": 8765424} +{"current_steps": 17820, "total_steps": 37885, "loss": 0.0, "lr": 1.2745074024241227e-06, "epoch": 2.351854295895473, "percentage": 47.04, "elapsed_time": "0:26:18", "remaining_time": "0:29:37", "throughput": 5553.42, "total_tokens": 8768048} +{"current_steps": 17825, "total_steps": 37885, "loss": 0.0296, "lr": 1.2740643729084974e-06, "epoch": 2.3525141876732216, "percentage": 47.05, "elapsed_time": "0:26:19", "remaining_time": "0:29:37", "throughput": 5553.91, "total_tokens": 8770672} +{"current_steps": 17830, "total_steps": 37885, "loss": 0.0406, "lr": 1.273621285224694e-06, "epoch": 2.35317407945097, "percentage": 47.06, "elapsed_time": "0:26:19", "remaining_time": "0:29:36", "throughput": 5554.49, "total_tokens": 8773424} +{"current_steps": 17835, "total_steps": 37885, "loss": 0.0001, "lr": 1.2731781394667538e-06, "epoch": 2.3538339712287186, "percentage": 47.08, "elapsed_time": "0:26:19", "remaining_time": "0:29:36", "throughput": 5554.84, "total_tokens": 8775792} +{"current_steps": 17840, "total_steps": 37885, "loss": 0.0003, "lr": 1.2727349357287322e-06, "epoch": 2.354493863006467, "percentage": 47.09, "elapsed_time": "0:26:20", "remaining_time": "0:29:35", "throughput": 5555.27, "total_tokens": 8778288} +{"current_steps": 17845, "total_steps": 37885, "loss": 0.0001, "lr": 1.2722916741046951e-06, "epoch": 2.355153754784215, "percentage": 47.1, "elapsed_time": "0:26:20", "remaining_time": "0:29:34", "throughput": 5555.73, "total_tokens": 8780848} +{"current_steps": 17850, "total_steps": 37885, "loss": 0.0007, "lr": 1.2718483546887222e-06, "epoch": 2.355813646561964, "percentage": 47.12, "elapsed_time": "0:26:20", "remaining_time": "0:29:34", "throughput": 5556.14, "total_tokens": 8783344} +{"current_steps": 17855, "total_steps": 37885, "loss": 0.0002, "lr": 1.2714049775749043e-06, "epoch": 2.356473538339712, "percentage": 47.13, "elapsed_time": "0:26:21", "remaining_time": "0:29:33", "throughput": 5556.53, "total_tokens": 8785776} +{"current_steps": 17860, "total_steps": 37885, "loss": 0.1, "lr": 1.2709615428573454e-06, "epoch": 2.357133430117461, "percentage": 47.14, "elapsed_time": "0:26:21", "remaining_time": "0:29:33", "throughput": 5556.76, "total_tokens": 8787952} +{"current_steps": 17865, "total_steps": 37885, "loss": 0.2573, "lr": 1.2705180506301614e-06, "epoch": 2.357793321895209, "percentage": 47.16, "elapsed_time": "0:26:21", "remaining_time": "0:29:32", "throughput": 5557.21, "total_tokens": 8790512} +{"current_steps": 17870, "total_steps": 37885, "loss": 0.0, "lr": 1.2700745009874799e-06, "epoch": 2.3584532136729575, "percentage": 47.17, "elapsed_time": "0:26:22", "remaining_time": "0:29:32", "throughput": 5557.51, "total_tokens": 8792816} +{"current_steps": 17875, "total_steps": 37885, "loss": 0.0752, "lr": 1.2696308940234414e-06, "epoch": 2.359113105450706, "percentage": 47.18, "elapsed_time": "0:26:22", "remaining_time": "0:29:31", "throughput": 5557.85, "total_tokens": 8795184} +{"current_steps": 17880, "total_steps": 37885, "loss": 0.1042, "lr": 1.2691872298321978e-06, "epoch": 2.3597729972284545, "percentage": 47.2, "elapsed_time": "0:26:22", "remaining_time": "0:29:30", "throughput": 5558.35, "total_tokens": 8797808} +{"current_steps": 17885, "total_steps": 37885, "loss": 0.0013, "lr": 1.2687435085079143e-06, "epoch": 2.360432889006203, "percentage": 47.21, "elapsed_time": "0:26:23", "remaining_time": "0:29:30", "throughput": 5558.82, "total_tokens": 8800368} +{"current_steps": 17890, "total_steps": 37885, "loss": 0.2316, "lr": 1.2682997301447671e-06, "epoch": 2.3610927807839515, "percentage": 47.22, "elapsed_time": "0:26:23", "remaining_time": "0:29:29", "throughput": 5559.31, "total_tokens": 8802992} +{"current_steps": 17895, "total_steps": 37885, "loss": 0.0001, "lr": 1.267855894836945e-06, "epoch": 2.3617526725616997, "percentage": 47.24, "elapsed_time": "0:26:23", "remaining_time": "0:29:29", "throughput": 5559.87, "total_tokens": 8805744} +{"current_steps": 17900, "total_steps": 37885, "loss": 0.0021, "lr": 1.267412002678649e-06, "epoch": 2.3624125643394485, "percentage": 47.25, "elapsed_time": "0:26:24", "remaining_time": "0:29:28", "throughput": 5560.15, "total_tokens": 8807984} +{"current_steps": 17905, "total_steps": 37885, "loss": 0.0008, "lr": 1.2669680537640916e-06, "epoch": 2.3630724561171967, "percentage": 47.26, "elapsed_time": "0:26:24", "remaining_time": "0:29:28", "throughput": 5560.57, "total_tokens": 8810480} +{"current_steps": 17910, "total_steps": 37885, "loss": 0.001, "lr": 1.2665240481874986e-06, "epoch": 2.3637323478949455, "percentage": 47.27, "elapsed_time": "0:26:24", "remaining_time": "0:29:27", "throughput": 5560.87, "total_tokens": 8812784} +{"current_steps": 17915, "total_steps": 37885, "loss": 0.0002, "lr": 1.266079986043106e-06, "epoch": 2.3643922396726937, "percentage": 47.29, "elapsed_time": "0:26:25", "remaining_time": "0:29:26", "throughput": 5561.32, "total_tokens": 8815344} +{"current_steps": 17920, "total_steps": 37885, "loss": 0.1032, "lr": 1.2656358674251633e-06, "epoch": 2.365052131450442, "percentage": 47.3, "elapsed_time": "0:26:25", "remaining_time": "0:29:26", "throughput": 5561.71, "total_tokens": 8817776} +{"current_steps": 17925, "total_steps": 37885, "loss": 0.0633, "lr": 1.2651916924279311e-06, "epoch": 2.3657120232281907, "percentage": 47.31, "elapsed_time": "0:26:25", "remaining_time": "0:29:25", "throughput": 5562.23, "total_tokens": 8820464} +{"current_steps": 17930, "total_steps": 37885, "loss": 0.1189, "lr": 1.2647474611456827e-06, "epoch": 2.366371915005939, "percentage": 47.33, "elapsed_time": "0:26:26", "remaining_time": "0:29:25", "throughput": 5562.84, "total_tokens": 8823280} +{"current_steps": 17935, "total_steps": 37885, "loss": 0.127, "lr": 1.2643031736727029e-06, "epoch": 2.3670318067836873, "percentage": 47.34, "elapsed_time": "0:26:26", "remaining_time": "0:29:24", "throughput": 5563.26, "total_tokens": 8825776} +{"current_steps": 17940, "total_steps": 37885, "loss": 0.1263, "lr": 1.2638588301032883e-06, "epoch": 2.367691698561436, "percentage": 47.35, "elapsed_time": "0:26:26", "remaining_time": "0:29:24", "throughput": 5563.53, "total_tokens": 8828016} +{"current_steps": 17945, "total_steps": 37885, "loss": 0.0705, "lr": 1.2634144305317479e-06, "epoch": 2.3683515903391843, "percentage": 47.37, "elapsed_time": "0:26:27", "remaining_time": "0:29:23", "throughput": 5563.76, "total_tokens": 8830192} +{"current_steps": 17950, "total_steps": 37885, "loss": 0.0828, "lr": 1.2629699750524017e-06, "epoch": 2.3690114821169326, "percentage": 47.38, "elapsed_time": "0:26:27", "remaining_time": "0:29:22", "throughput": 5564.14, "total_tokens": 8832624} +{"current_steps": 17955, "total_steps": 37885, "loss": 0.0549, "lr": 1.2625254637595829e-06, "epoch": 2.3696713738946813, "percentage": 47.39, "elapsed_time": "0:26:27", "remaining_time": "0:29:22", "throughput": 5564.63, "total_tokens": 8835248} +{"current_steps": 17960, "total_steps": 37885, "loss": 0.0008, "lr": 1.2620808967476352e-06, "epoch": 2.3703312656724296, "percentage": 47.41, "elapsed_time": "0:26:28", "remaining_time": "0:29:21", "throughput": 5565.19, "total_tokens": 8838000} +{"current_steps": 17965, "total_steps": 37885, "loss": 0.0022, "lr": 1.2616362741109154e-06, "epoch": 2.3709911574501783, "percentage": 47.42, "elapsed_time": "0:26:28", "remaining_time": "0:29:21", "throughput": 5565.71, "total_tokens": 8840688} +{"current_steps": 17970, "total_steps": 37885, "loss": 0.1106, "lr": 1.2611915959437908e-06, "epoch": 2.3716510492279266, "percentage": 47.43, "elapsed_time": "0:26:28", "remaining_time": "0:29:20", "throughput": 5566.09, "total_tokens": 8843120} +{"current_steps": 17975, "total_steps": 37885, "loss": 0.0799, "lr": 1.2607468623406415e-06, "epoch": 2.372310941005675, "percentage": 47.45, "elapsed_time": "0:26:29", "remaining_time": "0:29:20", "throughput": 5566.51, "total_tokens": 8845616} +{"current_steps": 17980, "total_steps": 37885, "loss": 0.002, "lr": 1.2603020733958588e-06, "epoch": 2.3729708327834236, "percentage": 47.46, "elapsed_time": "0:26:29", "remaining_time": "0:29:19", "throughput": 5566.78, "total_tokens": 8847856} +{"current_steps": 17985, "total_steps": 37885, "loss": 0.0003, "lr": 1.2598572292038459e-06, "epoch": 2.373630724561172, "percentage": 47.47, "elapsed_time": "0:26:29", "remaining_time": "0:29:19", "throughput": 5567.28, "total_tokens": 8850480} +{"current_steps": 17990, "total_steps": 37885, "loss": 0.0006, "lr": 1.2594123298590177e-06, "epoch": 2.3742906163389206, "percentage": 47.49, "elapsed_time": "0:26:30", "remaining_time": "0:29:18", "throughput": 5567.84, "total_tokens": 8853232} +{"current_steps": 17995, "total_steps": 37885, "loss": 0.0675, "lr": 1.2589673754558014e-06, "epoch": 2.374950508116669, "percentage": 47.5, "elapsed_time": "0:26:30", "remaining_time": "0:29:17", "throughput": 5568.21, "total_tokens": 8855664} +{"current_steps": 18000, "total_steps": 37885, "loss": 0.0001, "lr": 1.2585223660886347e-06, "epoch": 2.375610399894417, "percentage": 47.51, "elapsed_time": "0:26:30", "remaining_time": "0:29:17", "throughput": 5568.62, "total_tokens": 8858160} +{"current_steps": 18005, "total_steps": 37885, "loss": 0.0011, "lr": 1.258077301851968e-06, "epoch": 2.376270291672166, "percentage": 47.53, "elapsed_time": "0:26:31", "remaining_time": "0:29:16", "throughput": 5568.93, "total_tokens": 8860464} +{"current_steps": 18010, "total_steps": 37885, "loss": 0.0613, "lr": 1.2576321828402627e-06, "epoch": 2.376930183449914, "percentage": 47.54, "elapsed_time": "0:26:31", "remaining_time": "0:29:16", "throughput": 5569.32, "total_tokens": 8862896} +{"current_steps": 18015, "total_steps": 37885, "loss": 0.0488, "lr": 1.2571870091479921e-06, "epoch": 2.377590075227663, "percentage": 47.55, "elapsed_time": "0:26:31", "remaining_time": "0:29:15", "throughput": 5569.65, "total_tokens": 8865264} +{"current_steps": 18020, "total_steps": 37885, "loss": 0.0703, "lr": 1.2567417808696416e-06, "epoch": 2.378249967005411, "percentage": 47.56, "elapsed_time": "0:26:32", "remaining_time": "0:29:15", "throughput": 5570.06, "total_tokens": 8867760} +{"current_steps": 18025, "total_steps": 37885, "loss": 0.0002, "lr": 1.2562964980997072e-06, "epoch": 2.3789098587831594, "percentage": 47.58, "elapsed_time": "0:26:32", "remaining_time": "0:29:14", "throughput": 5570.58, "total_tokens": 8870448} +{"current_steps": 18030, "total_steps": 37885, "loss": 0.046, "lr": 1.2558511609326968e-06, "epoch": 2.379569750560908, "percentage": 47.59, "elapsed_time": "0:26:32", "remaining_time": "0:29:13", "throughput": 5571.1, "total_tokens": 8873136} +{"current_steps": 18035, "total_steps": 37885, "loss": 0.0041, "lr": 1.2554057694631302e-06, "epoch": 2.3802296423386564, "percentage": 47.6, "elapsed_time": "0:26:33", "remaining_time": "0:29:13", "throughput": 5571.51, "total_tokens": 8875632} +{"current_steps": 18040, "total_steps": 37885, "loss": 0.0006, "lr": 1.2549603237855386e-06, "epoch": 2.380889534116405, "percentage": 47.62, "elapsed_time": "0:26:33", "remaining_time": "0:29:12", "throughput": 5572.08, "total_tokens": 8878384} +{"current_steps": 18045, "total_steps": 37885, "loss": 0.0615, "lr": 1.2545148239944644e-06, "epoch": 2.3815494258941534, "percentage": 47.63, "elapsed_time": "0:26:33", "remaining_time": "0:29:12", "throughput": 5572.54, "total_tokens": 8880944} +{"current_steps": 18050, "total_steps": 37885, "loss": 0.0002, "lr": 1.2540692701844625e-06, "epoch": 2.3822093176719017, "percentage": 47.64, "elapsed_time": "0:26:34", "remaining_time": "0:29:11", "throughput": 5573.03, "total_tokens": 8883568} +{"current_steps": 18055, "total_steps": 37885, "loss": 0.052, "lr": 1.253623662450097e-06, "epoch": 2.3828692094496504, "percentage": 47.66, "elapsed_time": "0:26:34", "remaining_time": "0:29:11", "throughput": 5573.43, "total_tokens": 8886064} +{"current_steps": 18060, "total_steps": 37885, "loss": 0.0003, "lr": 1.2531780008859464e-06, "epoch": 2.3835291012273987, "percentage": 47.67, "elapsed_time": "0:26:34", "remaining_time": "0:29:10", "throughput": 5573.99, "total_tokens": 8888816} +{"current_steps": 18065, "total_steps": 37885, "loss": 0.0738, "lr": 1.252732285586598e-06, "epoch": 2.384188993005147, "percentage": 47.68, "elapsed_time": "0:26:35", "remaining_time": "0:29:09", "throughput": 5574.37, "total_tokens": 8891248} +{"current_steps": 18070, "total_steps": 37885, "loss": 0.0487, "lr": 1.2522865166466528e-06, "epoch": 2.3848488847828957, "percentage": 47.7, "elapsed_time": "0:26:35", "remaining_time": "0:29:09", "throughput": 5574.82, "total_tokens": 8893808} +{"current_steps": 18075, "total_steps": 37885, "loss": 0.0383, "lr": 1.2518406941607207e-06, "epoch": 2.385508776560644, "percentage": 47.71, "elapsed_time": "0:26:35", "remaining_time": "0:29:08", "throughput": 5575.24, "total_tokens": 8896304} +{"current_steps": 18080, "total_steps": 37885, "loss": 0.0004, "lr": 1.2513948182234253e-06, "epoch": 2.3861686683383927, "percentage": 47.72, "elapsed_time": "0:26:36", "remaining_time": "0:29:08", "throughput": 5575.57, "total_tokens": 8898672} +{"current_steps": 18085, "total_steps": 37885, "loss": 0.0002, "lr": 1.2509488889293998e-06, "epoch": 2.386828560116141, "percentage": 47.74, "elapsed_time": "0:26:36", "remaining_time": "0:29:07", "throughput": 5575.99, "total_tokens": 8901168} +{"current_steps": 18090, "total_steps": 37885, "loss": 0.0005, "lr": 1.2505029063732898e-06, "epoch": 2.3874884518938893, "percentage": 47.75, "elapsed_time": "0:26:36", "remaining_time": "0:29:07", "throughput": 5576.37, "total_tokens": 8903600} +{"current_steps": 18095, "total_steps": 37885, "loss": 0.0954, "lr": 1.2500568706497526e-06, "epoch": 2.388148343671638, "percentage": 47.76, "elapsed_time": "0:26:36", "remaining_time": "0:29:06", "throughput": 5576.75, "total_tokens": 8906032} +{"current_steps": 18100, "total_steps": 37885, "loss": 0.0035, "lr": 1.2496107818534548e-06, "epoch": 2.3888082354493863, "percentage": 47.78, "elapsed_time": "0:26:37", "remaining_time": "0:29:06", "throughput": 5577.08, "total_tokens": 8908400} +{"current_steps": 18105, "total_steps": 37885, "loss": 0.0573, "lr": 1.2491646400790766e-06, "epoch": 2.3894681272271345, "percentage": 47.79, "elapsed_time": "0:26:37", "remaining_time": "0:29:05", "throughput": 5577.46, "total_tokens": 8910832} +{"current_steps": 18110, "total_steps": 37885, "loss": 0.0613, "lr": 1.2487184454213073e-06, "epoch": 2.3901280190048833, "percentage": 47.8, "elapsed_time": "0:26:37", "remaining_time": "0:29:04", "throughput": 5577.8, "total_tokens": 8913200} +{"current_steps": 18115, "total_steps": 37885, "loss": 0.0002, "lr": 1.2482721979748494e-06, "epoch": 2.3907879107826315, "percentage": 47.82, "elapsed_time": "0:26:38", "remaining_time": "0:29:04", "throughput": 5578.13, "total_tokens": 8915568} +{"current_steps": 18120, "total_steps": 37885, "loss": 0.0691, "lr": 1.2478258978344149e-06, "epoch": 2.3914478025603803, "percentage": 47.83, "elapsed_time": "0:26:38", "remaining_time": "0:29:03", "throughput": 5578.32, "total_tokens": 8917680} +{"current_steps": 18125, "total_steps": 37885, "loss": 0.0109, "lr": 1.2473795450947287e-06, "epoch": 2.3921076943381285, "percentage": 47.84, "elapsed_time": "0:26:38", "remaining_time": "0:29:03", "throughput": 5578.69, "total_tokens": 8920112} +{"current_steps": 18130, "total_steps": 37885, "loss": 0.0002, "lr": 1.2469331398505254e-06, "epoch": 2.392767586115877, "percentage": 47.86, "elapsed_time": "0:26:39", "remaining_time": "0:29:02", "throughput": 5579.08, "total_tokens": 8922544} +{"current_steps": 18135, "total_steps": 37885, "loss": 0.094, "lr": 1.246486682196551e-06, "epoch": 2.3934274778936255, "percentage": 47.87, "elapsed_time": "0:26:39", "remaining_time": "0:29:02", "throughput": 5579.49, "total_tokens": 8925040} +{"current_steps": 18140, "total_steps": 37885, "loss": 0.0004, "lr": 1.2460401722275633e-06, "epoch": 2.394087369671374, "percentage": 47.88, "elapsed_time": "0:26:39", "remaining_time": "0:29:01", "throughput": 5579.83, "total_tokens": 8927408} +{"current_steps": 18145, "total_steps": 37885, "loss": 0.1117, "lr": 1.2455936100383309e-06, "epoch": 2.3947472614491225, "percentage": 47.89, "elapsed_time": "0:26:40", "remaining_time": "0:29:00", "throughput": 5580.2, "total_tokens": 8929840} +{"current_steps": 18150, "total_steps": 37885, "loss": 0.0003, "lr": 1.2451469957236334e-06, "epoch": 2.395407153226871, "percentage": 47.91, "elapsed_time": "0:26:40", "remaining_time": "0:29:00", "throughput": 5580.56, "total_tokens": 8932272} +{"current_steps": 18155, "total_steps": 37885, "loss": 0.0591, "lr": 1.2447003293782607e-06, "epoch": 2.396067045004619, "percentage": 47.92, "elapsed_time": "0:26:40", "remaining_time": "0:28:59", "throughput": 5580.91, "total_tokens": 8934640} +{"current_steps": 18160, "total_steps": 37885, "loss": 0.0025, "lr": 1.2442536110970152e-06, "epoch": 2.396726936782368, "percentage": 47.93, "elapsed_time": "0:26:41", "remaining_time": "0:28:59", "throughput": 5581.35, "total_tokens": 8937200} +{"current_steps": 18165, "total_steps": 37885, "loss": 0.0002, "lr": 1.2438068409747097e-06, "epoch": 2.397386828560116, "percentage": 47.95, "elapsed_time": "0:26:41", "remaining_time": "0:28:58", "throughput": 5581.69, "total_tokens": 8939568} +{"current_steps": 18170, "total_steps": 37885, "loss": 0.0413, "lr": 1.2433600191061677e-06, "epoch": 2.398046720337865, "percentage": 47.96, "elapsed_time": "0:26:41", "remaining_time": "0:28:58", "throughput": 5582.06, "total_tokens": 8942000} +{"current_steps": 18175, "total_steps": 37885, "loss": 0.0088, "lr": 1.242913145586224e-06, "epoch": 2.398706612115613, "percentage": 47.97, "elapsed_time": "0:26:42", "remaining_time": "0:28:57", "throughput": 5582.58, "total_tokens": 8944688} +{"current_steps": 18180, "total_steps": 37885, "loss": 0.0345, "lr": 1.2424662205097241e-06, "epoch": 2.3993665038933614, "percentage": 47.99, "elapsed_time": "0:26:42", "remaining_time": "0:28:57", "throughput": 5583.05, "total_tokens": 8947312} +{"current_steps": 18185, "total_steps": 37885, "loss": 0.0002, "lr": 1.2420192439715247e-06, "epoch": 2.40002639567111, "percentage": 48.0, "elapsed_time": "0:26:42", "remaining_time": "0:28:56", "throughput": 5583.45, "total_tokens": 8949808} +{"current_steps": 18190, "total_steps": 37885, "loss": 0.0003, "lr": 1.2415722160664933e-06, "epoch": 2.4006862874488584, "percentage": 48.01, "elapsed_time": "0:26:43", "remaining_time": "0:28:55", "throughput": 5583.76, "total_tokens": 8952112} +{"current_steps": 18195, "total_steps": 37885, "loss": 0.1239, "lr": 1.2411251368895085e-06, "epoch": 2.4013461792266066, "percentage": 48.03, "elapsed_time": "0:26:43", "remaining_time": "0:28:55", "throughput": 5584.28, "total_tokens": 8954800} +{"current_steps": 18200, "total_steps": 37885, "loss": 0.0001, "lr": 1.2406780065354592e-06, "epoch": 2.4020060710043554, "percentage": 48.04, "elapsed_time": "0:26:43", "remaining_time": "0:28:54", "throughput": 5584.73, "total_tokens": 8957360} +{"current_steps": 18205, "total_steps": 37885, "loss": 0.0001, "lr": 1.240230825099246e-06, "epoch": 2.4026659627821036, "percentage": 48.05, "elapsed_time": "0:26:44", "remaining_time": "0:28:54", "throughput": 5585.28, "total_tokens": 8960112} +{"current_steps": 18210, "total_steps": 37885, "loss": 0.0006, "lr": 1.2397835926757798e-06, "epoch": 2.4033258545598524, "percentage": 48.07, "elapsed_time": "0:26:44", "remaining_time": "0:28:53", "throughput": 5585.69, "total_tokens": 8962608} +{"current_steps": 18215, "total_steps": 37885, "loss": 0.133, "lr": 1.2393363093599823e-06, "epoch": 2.4039857463376006, "percentage": 48.08, "elapsed_time": "0:26:44", "remaining_time": "0:28:53", "throughput": 5586.06, "total_tokens": 8965040} +{"current_steps": 18220, "total_steps": 37885, "loss": 0.0659, "lr": 1.2388889752467867e-06, "epoch": 2.404645638115349, "percentage": 48.09, "elapsed_time": "0:26:45", "remaining_time": "0:28:52", "throughput": 5586.29, "total_tokens": 8967216} +{"current_steps": 18225, "total_steps": 37885, "loss": 0.0, "lr": 1.2384415904311357e-06, "epoch": 2.4053055298930976, "percentage": 48.11, "elapsed_time": "0:26:45", "remaining_time": "0:28:51", "throughput": 5586.69, "total_tokens": 8969712} +{"current_steps": 18230, "total_steps": 37885, "loss": 0.0001, "lr": 1.2379941550079836e-06, "epoch": 2.405965421670846, "percentage": 48.12, "elapsed_time": "0:26:45", "remaining_time": "0:28:51", "throughput": 5587.11, "total_tokens": 8972208} +{"current_steps": 18235, "total_steps": 37885, "loss": 0.105, "lr": 1.2375466690722957e-06, "epoch": 2.406625313448594, "percentage": 48.13, "elapsed_time": "0:26:46", "remaining_time": "0:28:50", "throughput": 5587.55, "total_tokens": 8974768} +{"current_steps": 18240, "total_steps": 37885, "loss": 0.0736, "lr": 1.2370991327190473e-06, "epoch": 2.407285205226343, "percentage": 48.15, "elapsed_time": "0:26:46", "remaining_time": "0:28:50", "throughput": 5587.92, "total_tokens": 8977200} +{"current_steps": 18245, "total_steps": 37885, "loss": 0.0, "lr": 1.2366515460432255e-06, "epoch": 2.407945097004091, "percentage": 48.16, "elapsed_time": "0:26:46", "remaining_time": "0:28:49", "throughput": 5588.25, "total_tokens": 8979568} +{"current_steps": 18250, "total_steps": 37885, "loss": 0.0907, "lr": 1.2362039091398259e-06, "epoch": 2.40860498878184, "percentage": 48.17, "elapsed_time": "0:26:47", "remaining_time": "0:28:49", "throughput": 5588.58, "total_tokens": 8981936} +{"current_steps": 18255, "total_steps": 37885, "loss": 0.1141, "lr": 1.235756222103858e-06, "epoch": 2.409264880559588, "percentage": 48.19, "elapsed_time": "0:26:47", "remaining_time": "0:28:48", "throughput": 5588.95, "total_tokens": 8984368} +{"current_steps": 18260, "total_steps": 37885, "loss": 0.1377, "lr": 1.2353084850303386e-06, "epoch": 2.4099247723373365, "percentage": 48.2, "elapsed_time": "0:26:47", "remaining_time": "0:28:48", "throughput": 5589.29, "total_tokens": 8986736} +{"current_steps": 18265, "total_steps": 37885, "loss": 0.1191, "lr": 1.2348606980142973e-06, "epoch": 2.410584664115085, "percentage": 48.21, "elapsed_time": "0:26:48", "remaining_time": "0:28:47", "throughput": 5589.39, "total_tokens": 8988720} +{"current_steps": 18270, "total_steps": 37885, "loss": 0.0002, "lr": 1.2344128611507733e-06, "epoch": 2.4112445558928335, "percentage": 48.22, "elapsed_time": "0:26:48", "remaining_time": "0:28:46", "throughput": 5589.66, "total_tokens": 8990960} +{"current_steps": 18275, "total_steps": 37885, "loss": 0.0002, "lr": 1.2339649745348176e-06, "epoch": 2.411904447670582, "percentage": 48.24, "elapsed_time": "0:26:48", "remaining_time": "0:28:46", "throughput": 5589.99, "total_tokens": 8993328} +{"current_steps": 18280, "total_steps": 37885, "loss": 0.0004, "lr": 1.23351703826149e-06, "epoch": 2.4125643394483305, "percentage": 48.25, "elapsed_time": "0:26:49", "remaining_time": "0:28:45", "throughput": 5590.29, "total_tokens": 8995632} +{"current_steps": 18285, "total_steps": 37885, "loss": 0.046, "lr": 1.2330690524258618e-06, "epoch": 2.4132242312260788, "percentage": 48.26, "elapsed_time": "0:26:49", "remaining_time": "0:28:45", "throughput": 5590.77, "total_tokens": 8998256} +{"current_steps": 18290, "total_steps": 37885, "loss": 0.1114, "lr": 1.2326210171230152e-06, "epoch": 2.4138841230038275, "percentage": 48.28, "elapsed_time": "0:26:49", "remaining_time": "0:28:44", "throughput": 5590.95, "total_tokens": 9000368} +{"current_steps": 18295, "total_steps": 37885, "loss": 0.0758, "lr": 1.2321729324480422e-06, "epoch": 2.4145440147815758, "percentage": 48.29, "elapsed_time": "0:26:50", "remaining_time": "0:28:44", "throughput": 5591.31, "total_tokens": 9002800} +{"current_steps": 18300, "total_steps": 37885, "loss": 0.0675, "lr": 1.2317247984960455e-06, "epoch": 2.4152039065593245, "percentage": 48.3, "elapsed_time": "0:26:50", "remaining_time": "0:28:43", "throughput": 5591.68, "total_tokens": 9005232} +{"current_steps": 18305, "total_steps": 37885, "loss": 0.0001, "lr": 1.2312766153621383e-06, "epoch": 2.4158637983370728, "percentage": 48.32, "elapsed_time": "0:26:50", "remaining_time": "0:28:42", "throughput": 5592.2, "total_tokens": 9007920} +{"current_steps": 18310, "total_steps": 37885, "loss": 0.0005, "lr": 1.2308283831414444e-06, "epoch": 2.416523690114821, "percentage": 48.33, "elapsed_time": "0:26:51", "remaining_time": "0:28:42", "throughput": 5592.62, "total_tokens": 9010416} +{"current_steps": 18315, "total_steps": 37885, "loss": 0.0003, "lr": 1.2303801019290978e-06, "epoch": 2.4171835818925698, "percentage": 48.34, "elapsed_time": "0:26:51", "remaining_time": "0:28:41", "throughput": 5593.16, "total_tokens": 9013168} +{"current_steps": 18320, "total_steps": 37885, "loss": 0.0476, "lr": 1.2299317718202424e-06, "epoch": 2.417843473670318, "percentage": 48.36, "elapsed_time": "0:26:51", "remaining_time": "0:28:41", "throughput": 5593.59, "total_tokens": 9015728} +{"current_steps": 18325, "total_steps": 37885, "loss": 0.0326, "lr": 1.229483392910034e-06, "epoch": 2.4185033654480668, "percentage": 48.37, "elapsed_time": "0:26:52", "remaining_time": "0:28:40", "throughput": 5594.1, "total_tokens": 9018416} +{"current_steps": 18330, "total_steps": 37885, "loss": 0.0002, "lr": 1.229034965293637e-06, "epoch": 2.419163257225815, "percentage": 48.38, "elapsed_time": "0:26:52", "remaining_time": "0:28:40", "throughput": 5594.53, "total_tokens": 9020976} +{"current_steps": 18335, "total_steps": 37885, "loss": 0.0002, "lr": 1.2285864890662272e-06, "epoch": 2.4198231490035633, "percentage": 48.4, "elapsed_time": "0:26:52", "remaining_time": "0:28:39", "throughput": 5595.09, "total_tokens": 9023728} +{"current_steps": 18340, "total_steps": 37885, "loss": 0.0007, "lr": 1.2281379643229904e-06, "epoch": 2.420483040781312, "percentage": 48.41, "elapsed_time": "0:26:53", "remaining_time": "0:28:39", "throughput": 5595.43, "total_tokens": 9026096} +{"current_steps": 18345, "total_steps": 37885, "loss": 0.0509, "lr": 1.2276893911591226e-06, "epoch": 2.4211429325590603, "percentage": 48.42, "elapsed_time": "0:26:53", "remaining_time": "0:28:38", "throughput": 5595.85, "total_tokens": 9028656} +{"current_steps": 18350, "total_steps": 37885, "loss": 0.0002, "lr": 1.2272407696698303e-06, "epoch": 2.4218028243368086, "percentage": 48.44, "elapsed_time": "0:26:53", "remaining_time": "0:28:38", "throughput": 5596.44, "total_tokens": 9031472} +{"current_steps": 18355, "total_steps": 37885, "loss": 0.0005, "lr": 1.2267920999503302e-06, "epoch": 2.4224627161145573, "percentage": 48.45, "elapsed_time": "0:26:54", "remaining_time": "0:28:37", "throughput": 5597.05, "total_tokens": 9034352} +{"current_steps": 18360, "total_steps": 37885, "loss": 0.0001, "lr": 1.2263433820958494e-06, "epoch": 2.4231226078923056, "percentage": 48.46, "elapsed_time": "0:26:54", "remaining_time": "0:28:36", "throughput": 5597.39, "total_tokens": 9036720} +{"current_steps": 18365, "total_steps": 37885, "loss": 0.0003, "lr": 1.2258946162016247e-06, "epoch": 2.423782499670054, "percentage": 48.48, "elapsed_time": "0:26:54", "remaining_time": "0:28:36", "throughput": 5597.8, "total_tokens": 9039216} +{"current_steps": 18370, "total_steps": 37885, "loss": 0.0595, "lr": 1.2254458023629035e-06, "epoch": 2.4244423914478026, "percentage": 48.49, "elapsed_time": "0:26:55", "remaining_time": "0:28:35", "throughput": 5598.12, "total_tokens": 9041584} +{"current_steps": 18375, "total_steps": 37885, "loss": 0.0442, "lr": 1.2249969406749432e-06, "epoch": 2.425102283225551, "percentage": 48.5, "elapsed_time": "0:26:55", "remaining_time": "0:28:35", "throughput": 5598.41, "total_tokens": 9043888} +{"current_steps": 18380, "total_steps": 37885, "loss": 0.0003, "lr": 1.2245480312330117e-06, "epoch": 2.4257621750032996, "percentage": 48.52, "elapsed_time": "0:26:55", "remaining_time": "0:28:34", "throughput": 5598.78, "total_tokens": 9046320} +{"current_steps": 18385, "total_steps": 37885, "loss": 0.0675, "lr": 1.2240990741323867e-06, "epoch": 2.426422066781048, "percentage": 48.53, "elapsed_time": "0:26:56", "remaining_time": "0:28:34", "throughput": 5599.22, "total_tokens": 9048880} +{"current_steps": 18390, "total_steps": 37885, "loss": 0.0, "lr": 1.2236500694683555e-06, "epoch": 2.427081958558796, "percentage": 48.54, "elapsed_time": "0:26:56", "remaining_time": "0:28:33", "throughput": 5599.58, "total_tokens": 9051312} +{"current_steps": 18395, "total_steps": 37885, "loss": 0.1489, "lr": 1.223201017336217e-06, "epoch": 2.427741850336545, "percentage": 48.55, "elapsed_time": "0:26:56", "remaining_time": "0:28:33", "throughput": 5600.09, "total_tokens": 9054000} +{"current_steps": 18400, "total_steps": 37885, "loss": 0.001, "lr": 1.222751917831279e-06, "epoch": 2.428401742114293, "percentage": 48.57, "elapsed_time": "0:26:57", "remaining_time": "0:28:32", "throughput": 5600.34, "total_tokens": 9056240} +{"current_steps": 18405, "total_steps": 37885, "loss": 0.1132, "lr": 1.2223027710488591e-06, "epoch": 2.429061633892042, "percentage": 48.58, "elapsed_time": "0:26:57", "remaining_time": "0:28:31", "throughput": 5600.71, "total_tokens": 9058672} +{"current_steps": 18410, "total_steps": 37885, "loss": 0.0001, "lr": 1.221853577084286e-06, "epoch": 2.42972152566979, "percentage": 48.59, "elapsed_time": "0:26:57", "remaining_time": "0:28:31", "throughput": 5601.07, "total_tokens": 9061104} +{"current_steps": 18415, "total_steps": 37885, "loss": 0.1807, "lr": 1.221404336032898e-06, "epoch": 2.4303814174475384, "percentage": 48.61, "elapsed_time": "0:26:58", "remaining_time": "0:28:30", "throughput": 5601.29, "total_tokens": 9063280} +{"current_steps": 18420, "total_steps": 37885, "loss": 0.0442, "lr": 1.2209550479900425e-06, "epoch": 2.431041309225287, "percentage": 48.62, "elapsed_time": "0:26:58", "remaining_time": "0:28:30", "throughput": 5601.75, "total_tokens": 9065840} +{"current_steps": 18425, "total_steps": 37885, "loss": 0.0, "lr": 1.2205057130510783e-06, "epoch": 2.4317012010030354, "percentage": 48.63, "elapsed_time": "0:26:58", "remaining_time": "0:28:29", "throughput": 5601.96, "total_tokens": 9068016} +{"current_steps": 18430, "total_steps": 37885, "loss": 0.0813, "lr": 1.2200563313113732e-06, "epoch": 2.432361092780784, "percentage": 48.65, "elapsed_time": "0:26:59", "remaining_time": "0:28:29", "throughput": 5602.33, "total_tokens": 9070448} +{"current_steps": 18435, "total_steps": 37885, "loss": 0.0473, "lr": 1.2196069028663057e-06, "epoch": 2.4330209845585324, "percentage": 48.66, "elapsed_time": "0:26:59", "remaining_time": "0:28:28", "throughput": 5602.69, "total_tokens": 9072880} +{"current_steps": 18440, "total_steps": 37885, "loss": 0.0002, "lr": 1.219157427811263e-06, "epoch": 2.4336808763362807, "percentage": 48.67, "elapsed_time": "0:26:59", "remaining_time": "0:28:27", "throughput": 5603.01, "total_tokens": 9075248} +{"current_steps": 18445, "total_steps": 37885, "loss": 0.0627, "lr": 1.218707906241643e-06, "epoch": 2.4343407681140294, "percentage": 48.69, "elapsed_time": "0:27:00", "remaining_time": "0:28:27", "throughput": 5603.41, "total_tokens": 9077744} +{"current_steps": 18450, "total_steps": 37885, "loss": 0.0003, "lr": 1.2182583382528543e-06, "epoch": 2.4350006598917777, "percentage": 48.7, "elapsed_time": "0:27:00", "remaining_time": "0:28:26", "throughput": 5603.99, "total_tokens": 9080560} +{"current_steps": 18455, "total_steps": 37885, "loss": 0.0109, "lr": 1.2178087239403133e-06, "epoch": 2.4356605516695264, "percentage": 48.71, "elapsed_time": "0:27:00", "remaining_time": "0:28:26", "throughput": 5604.36, "total_tokens": 9082992} +{"current_steps": 18460, "total_steps": 37885, "loss": 0.0844, "lr": 1.2173590633994479e-06, "epoch": 2.4363204434472747, "percentage": 48.73, "elapsed_time": "0:27:01", "remaining_time": "0:28:25", "throughput": 5604.77, "total_tokens": 9085552} +{"current_steps": 18465, "total_steps": 37885, "loss": 0.0001, "lr": 1.2169093567256955e-06, "epoch": 2.436980335225023, "percentage": 48.74, "elapsed_time": "0:27:01", "remaining_time": "0:28:25", "throughput": 5604.96, "total_tokens": 9087728} +{"current_steps": 18470, "total_steps": 37885, "loss": 0.0004, "lr": 1.2164596040145028e-06, "epoch": 2.4376402270027717, "percentage": 48.75, "elapsed_time": "0:27:01", "remaining_time": "0:28:24", "throughput": 5605.21, "total_tokens": 9089968} +{"current_steps": 18475, "total_steps": 37885, "loss": 0.0011, "lr": 1.2160098053613267e-06, "epoch": 2.43830011878052, "percentage": 48.77, "elapsed_time": "0:27:02", "remaining_time": "0:28:24", "throughput": 5605.6, "total_tokens": 9092464} +{"current_steps": 18480, "total_steps": 37885, "loss": 0.0345, "lr": 1.2155599608616331e-06, "epoch": 2.4389600105582683, "percentage": 48.78, "elapsed_time": "0:27:02", "remaining_time": "0:28:23", "throughput": 5606.05, "total_tokens": 9095088} +{"current_steps": 18485, "total_steps": 37885, "loss": 0.0006, "lr": 1.2151100706108996e-06, "epoch": 2.439619902336017, "percentage": 48.79, "elapsed_time": "0:27:02", "remaining_time": "0:28:23", "throughput": 5606.51, "total_tokens": 9097712} +{"current_steps": 18490, "total_steps": 37885, "loss": 0.0643, "lr": 1.2146601347046107e-06, "epoch": 2.4402797941137653, "percentage": 48.81, "elapsed_time": "0:27:03", "remaining_time": "0:28:22", "throughput": 5606.98, "total_tokens": 9100336} +{"current_steps": 18495, "total_steps": 37885, "loss": 0.0002, "lr": 1.214210153238263e-06, "epoch": 2.4409396858915136, "percentage": 48.82, "elapsed_time": "0:27:03", "remaining_time": "0:28:21", "throughput": 5607.47, "total_tokens": 9103024} +{"current_steps": 18500, "total_steps": 37885, "loss": 0.0001, "lr": 1.2137601263073613e-06, "epoch": 2.4415995776692623, "percentage": 48.83, "elapsed_time": "0:27:03", "remaining_time": "0:28:21", "throughput": 5607.86, "total_tokens": 9105520} +{"current_steps": 18505, "total_steps": 37885, "loss": 0.0689, "lr": 1.2133100540074206e-06, "epoch": 2.4422594694470106, "percentage": 48.85, "elapsed_time": "0:27:04", "remaining_time": "0:28:20", "throughput": 5608.25, "total_tokens": 9108016} +{"current_steps": 18510, "total_steps": 37885, "loss": 0.0003, "lr": 1.2128599364339663e-06, "epoch": 2.4429193612247593, "percentage": 48.86, "elapsed_time": "0:27:04", "remaining_time": "0:28:20", "throughput": 5608.52, "total_tokens": 9110320} +{"current_steps": 18515, "total_steps": 37885, "loss": 0.0113, "lr": 1.212409773682531e-06, "epoch": 2.4435792530025076, "percentage": 48.87, "elapsed_time": "0:27:04", "remaining_time": "0:28:19", "throughput": 5608.8, "total_tokens": 9112624} +{"current_steps": 18520, "total_steps": 37885, "loss": 0.0001, "lr": 1.2119595658486599e-06, "epoch": 2.444239144780256, "percentage": 48.88, "elapsed_time": "0:27:05", "remaining_time": "0:28:19", "throughput": 5609.19, "total_tokens": 9115120} +{"current_steps": 18525, "total_steps": 37885, "loss": 0.111, "lr": 1.2115093130279055e-06, "epoch": 2.4448990365580046, "percentage": 48.9, "elapsed_time": "0:27:05", "remaining_time": "0:28:18", "throughput": 5609.6, "total_tokens": 9117680} +{"current_steps": 18530, "total_steps": 37885, "loss": 0.1346, "lr": 1.2110590153158313e-06, "epoch": 2.445558928335753, "percentage": 48.91, "elapsed_time": "0:27:05", "remaining_time": "0:28:18", "throughput": 5609.94, "total_tokens": 9120112} +{"current_steps": 18535, "total_steps": 37885, "loss": 0.0004, "lr": 1.2106086728080095e-06, "epoch": 2.4462188201135016, "percentage": 48.92, "elapsed_time": "0:27:06", "remaining_time": "0:28:17", "throughput": 5610.42, "total_tokens": 9122800} +{"current_steps": 18540, "total_steps": 37885, "loss": 0.0473, "lr": 1.2101582856000219e-06, "epoch": 2.44687871189125, "percentage": 48.94, "elapsed_time": "0:27:06", "remaining_time": "0:28:16", "throughput": 5610.6, "total_tokens": 9124976} +{"current_steps": 18545, "total_steps": 37885, "loss": 0.0337, "lr": 1.20970785378746e-06, "epoch": 2.447538603668998, "percentage": 48.95, "elapsed_time": "0:27:06", "remaining_time": "0:28:16", "throughput": 5610.97, "total_tokens": 9127472} +{"current_steps": 18550, "total_steps": 37885, "loss": 0.0005, "lr": 1.2092573774659247e-06, "epoch": 2.448198495446747, "percentage": 48.96, "elapsed_time": "0:27:07", "remaining_time": "0:28:15", "throughput": 5611.52, "total_tokens": 9130288} +{"current_steps": 18555, "total_steps": 37885, "loss": 0.0011, "lr": 1.2088068567310266e-06, "epoch": 2.448858387224495, "percentage": 48.98, "elapsed_time": "0:27:07", "remaining_time": "0:28:15", "throughput": 5611.74, "total_tokens": 9132528} +{"current_steps": 18560, "total_steps": 37885, "loss": 0.197, "lr": 1.2083562916783852e-06, "epoch": 2.449518279002244, "percentage": 48.99, "elapsed_time": "0:27:07", "remaining_time": "0:28:14", "throughput": 5612.18, "total_tokens": 9135152} +{"current_steps": 18565, "total_steps": 37885, "loss": 0.088, "lr": 1.2079056824036294e-06, "epoch": 2.450178170779992, "percentage": 49.0, "elapsed_time": "0:27:08", "remaining_time": "0:28:14", "throughput": 5612.77, "total_tokens": 9138032} +{"current_steps": 18570, "total_steps": 37885, "loss": 0.0004, "lr": 1.207455029002398e-06, "epoch": 2.4508380625577404, "percentage": 49.02, "elapsed_time": "0:27:08", "remaining_time": "0:28:13", "throughput": 5613.12, "total_tokens": 9140528} +{"current_steps": 18575, "total_steps": 37885, "loss": 0.0495, "lr": 1.207004331570339e-06, "epoch": 2.451497954335489, "percentage": 49.03, "elapsed_time": "0:27:08", "remaining_time": "0:28:13", "throughput": 5613.35, "total_tokens": 9142768} +{"current_steps": 18580, "total_steps": 37885, "loss": 0.0004, "lr": 1.2065535902031098e-06, "epoch": 2.4521578461132374, "percentage": 49.04, "elapsed_time": "0:27:09", "remaining_time": "0:28:12", "throughput": 5613.78, "total_tokens": 9145392} +{"current_steps": 18585, "total_steps": 37885, "loss": 0.1586, "lr": 1.206102804996377e-06, "epoch": 2.452817737890986, "percentage": 49.06, "elapsed_time": "0:27:09", "remaining_time": "0:28:12", "throughput": 5614.04, "total_tokens": 9147696} +{"current_steps": 18590, "total_steps": 37885, "loss": 0.0845, "lr": 1.2056519760458162e-06, "epoch": 2.4534776296687344, "percentage": 49.07, "elapsed_time": "0:27:09", "remaining_time": "0:28:11", "throughput": 5614.49, "total_tokens": 9150320} +{"current_steps": 18595, "total_steps": 37885, "loss": 0.0212, "lr": 1.2052011034471123e-06, "epoch": 2.4541375214464827, "percentage": 49.08, "elapsed_time": "0:27:10", "remaining_time": "0:28:11", "throughput": 5615.01, "total_tokens": 9153072} +{"current_steps": 18600, "total_steps": 37885, "loss": 0.0005, "lr": 1.2047501872959606e-06, "epoch": 2.4547974132242314, "percentage": 49.1, "elapsed_time": "0:27:10", "remaining_time": "0:28:10", "throughput": 5615.34, "total_tokens": 9155504} +{"current_steps": 18605, "total_steps": 37885, "loss": 0.0002, "lr": 1.204299227688064e-06, "epoch": 2.4554573050019797, "percentage": 49.11, "elapsed_time": "0:27:10", "remaining_time": "0:28:09", "throughput": 5615.75, "total_tokens": 9158064} +{"current_steps": 18610, "total_steps": 37885, "loss": 0.1095, "lr": 1.203848224719136e-06, "epoch": 2.456117196779728, "percentage": 49.12, "elapsed_time": "0:27:11", "remaining_time": "0:28:09", "throughput": 5616.19, "total_tokens": 9160688} +{"current_steps": 18615, "total_steps": 37885, "loss": 0.0001, "lr": 1.2033971784848985e-06, "epoch": 2.4567770885574767, "percentage": 49.14, "elapsed_time": "0:27:11", "remaining_time": "0:28:08", "throughput": 5616.5, "total_tokens": 9163056} +{"current_steps": 18620, "total_steps": 37885, "loss": 0.0004, "lr": 1.2029460890810826e-06, "epoch": 2.457436980335225, "percentage": 49.15, "elapsed_time": "0:27:11", "remaining_time": "0:28:08", "throughput": 5616.94, "total_tokens": 9165680} +{"current_steps": 18625, "total_steps": 37885, "loss": 0.0215, "lr": 1.202494956603429e-06, "epoch": 2.4580968721129732, "percentage": 49.16, "elapsed_time": "0:27:12", "remaining_time": "0:28:07", "throughput": 5617.2, "total_tokens": 9167984} +{"current_steps": 18630, "total_steps": 37885, "loss": 0.0013, "lr": 1.2020437811476872e-06, "epoch": 2.458756763890722, "percentage": 49.18, "elapsed_time": "0:27:12", "remaining_time": "0:28:07", "throughput": 5617.65, "total_tokens": 9170608} +{"current_steps": 18635, "total_steps": 37885, "loss": 0.0002, "lr": 1.2015925628096157e-06, "epoch": 2.4594166556684702, "percentage": 49.19, "elapsed_time": "0:27:12", "remaining_time": "0:28:06", "throughput": 5617.94, "total_tokens": 9172976} +{"current_steps": 18640, "total_steps": 37885, "loss": 0.0003, "lr": 1.2011413016849829e-06, "epoch": 2.460076547446219, "percentage": 49.2, "elapsed_time": "0:27:13", "remaining_time": "0:28:06", "throughput": 5618.38, "total_tokens": 9175600} +{"current_steps": 18645, "total_steps": 37885, "loss": 0.0007, "lr": 1.2006899978695653e-06, "epoch": 2.4607364392239672, "percentage": 49.21, "elapsed_time": "0:27:13", "remaining_time": "0:28:05", "throughput": 5618.64, "total_tokens": 9177904} +{"current_steps": 18650, "total_steps": 37885, "loss": 0.0005, "lr": 1.200238651459149e-06, "epoch": 2.4613963310017155, "percentage": 49.23, "elapsed_time": "0:27:13", "remaining_time": "0:28:05", "throughput": 5619.01, "total_tokens": 9180400} +{"current_steps": 18655, "total_steps": 37885, "loss": 0.0782, "lr": 1.1997872625495284e-06, "epoch": 2.4620562227794642, "percentage": 49.24, "elapsed_time": "0:27:14", "remaining_time": "0:28:04", "throughput": 5619.36, "total_tokens": 9182896} +{"current_steps": 18660, "total_steps": 37885, "loss": 0.0021, "lr": 1.1993358312365087e-06, "epoch": 2.4627161145572125, "percentage": 49.25, "elapsed_time": "0:27:14", "remaining_time": "0:28:03", "throughput": 5619.7, "total_tokens": 9185328} +{"current_steps": 18665, "total_steps": 37885, "loss": 0.0001, "lr": 1.198884357615902e-06, "epoch": 2.4633760063349612, "percentage": 49.27, "elapsed_time": "0:27:14", "remaining_time": "0:28:03", "throughput": 5620.03, "total_tokens": 9187760} +{"current_steps": 18670, "total_steps": 37885, "loss": 0.0844, "lr": 1.1984328417835307e-06, "epoch": 2.4640358981127095, "percentage": 49.28, "elapsed_time": "0:27:15", "remaining_time": "0:28:02", "throughput": 5620.18, "total_tokens": 9189872} +{"current_steps": 18675, "total_steps": 37885, "loss": 0.1752, "lr": 1.1979812838352257e-06, "epoch": 2.464695789890458, "percentage": 49.29, "elapsed_time": "0:27:15", "remaining_time": "0:28:02", "throughput": 5620.45, "total_tokens": 9192176} +{"current_steps": 18680, "total_steps": 37885, "loss": 0.0296, "lr": 1.1975296838668266e-06, "epoch": 2.4653556816682065, "percentage": 49.31, "elapsed_time": "0:27:15", "remaining_time": "0:28:01", "throughput": 5620.73, "total_tokens": 9194480} +{"current_steps": 18685, "total_steps": 37885, "loss": 0.0712, "lr": 1.1970780419741828e-06, "epoch": 2.466015573445955, "percentage": 49.32, "elapsed_time": "0:27:16", "remaining_time": "0:28:01", "throughput": 5621.12, "total_tokens": 9196976} +{"current_steps": 18690, "total_steps": 37885, "loss": 0.0004, "lr": 1.1966263582531517e-06, "epoch": 2.4666754652237035, "percentage": 49.33, "elapsed_time": "0:27:16", "remaining_time": "0:28:00", "throughput": 5621.36, "total_tokens": 9199216} +{"current_steps": 18695, "total_steps": 37885, "loss": 0.0064, "lr": 1.1961746327996e-06, "epoch": 2.467335357001452, "percentage": 49.35, "elapsed_time": "0:27:16", "remaining_time": "0:28:00", "throughput": 5621.71, "total_tokens": 9201648} +{"current_steps": 18700, "total_steps": 37885, "loss": 0.0253, "lr": 1.1957228657094027e-06, "epoch": 2.4679952487792, "percentage": 49.36, "elapsed_time": "0:27:17", "remaining_time": "0:27:59", "throughput": 5621.88, "total_tokens": 9203760} +{"current_steps": 18705, "total_steps": 37885, "loss": 0.0004, "lr": 1.1952710570784447e-06, "epoch": 2.468655140556949, "percentage": 49.37, "elapsed_time": "0:27:17", "remaining_time": "0:27:59", "throughput": 5622.12, "total_tokens": 9206000} +{"current_steps": 18710, "total_steps": 37885, "loss": 0.1172, "lr": 1.194819207002619e-06, "epoch": 2.469315032334697, "percentage": 49.39, "elapsed_time": "0:27:17", "remaining_time": "0:27:58", "throughput": 5622.52, "total_tokens": 9208496} +{"current_steps": 18715, "total_steps": 37885, "loss": 0.1238, "lr": 1.194367315577827e-06, "epoch": 2.469974924112446, "percentage": 49.4, "elapsed_time": "0:27:18", "remaining_time": "0:27:57", "throughput": 5622.98, "total_tokens": 9211120} +{"current_steps": 18720, "total_steps": 37885, "loss": 0.1403, "lr": 1.1939153828999801e-06, "epoch": 2.470634815890194, "percentage": 49.41, "elapsed_time": "0:27:18", "remaining_time": "0:27:57", "throughput": 5623.44, "total_tokens": 9213744} +{"current_steps": 18725, "total_steps": 37885, "loss": 0.0015, "lr": 1.1934634090649973e-06, "epoch": 2.4712947076679423, "percentage": 49.43, "elapsed_time": "0:27:18", "remaining_time": "0:27:56", "throughput": 5623.62, "total_tokens": 9215856} +{"current_steps": 18730, "total_steps": 37885, "loss": 0.1295, "lr": 1.1930113941688072e-06, "epoch": 2.471954599445691, "percentage": 49.44, "elapsed_time": "0:27:19", "remaining_time": "0:27:56", "throughput": 5623.9, "total_tokens": 9218160} +{"current_steps": 18735, "total_steps": 37885, "loss": 0.0516, "lr": 1.1925593383073458e-06, "epoch": 2.4726144912234393, "percentage": 49.45, "elapsed_time": "0:27:19", "remaining_time": "0:27:55", "throughput": 5624.4, "total_tokens": 9220848} +{"current_steps": 18740, "total_steps": 37885, "loss": 0.0631, "lr": 1.1921072415765595e-06, "epoch": 2.4732743830011876, "percentage": 49.47, "elapsed_time": "0:27:19", "remaining_time": "0:27:55", "throughput": 5624.78, "total_tokens": 9223344} +{"current_steps": 18745, "total_steps": 37885, "loss": 0.0004, "lr": 1.1916551040724026e-06, "epoch": 2.4739342747789363, "percentage": 49.48, "elapsed_time": "0:27:20", "remaining_time": "0:27:54", "throughput": 5625.07, "total_tokens": 9225648} +{"current_steps": 18750, "total_steps": 37885, "loss": 0.0413, "lr": 1.191202925890837e-06, "epoch": 2.4745941665566846, "percentage": 49.49, "elapsed_time": "0:27:20", "remaining_time": "0:27:54", "throughput": 5625.39, "total_tokens": 9228016} +{"current_steps": 18755, "total_steps": 37885, "loss": 0.0693, "lr": 1.1907507071278358e-06, "epoch": 2.475254058334433, "percentage": 49.51, "elapsed_time": "0:27:20", "remaining_time": "0:27:53", "throughput": 5625.61, "total_tokens": 9230192} +{"current_steps": 18760, "total_steps": 37885, "loss": 0.0002, "lr": 1.1902984478793776e-06, "epoch": 2.4759139501121816, "percentage": 49.52, "elapsed_time": "0:27:21", "remaining_time": "0:27:53", "throughput": 5625.95, "total_tokens": 9232624} +{"current_steps": 18765, "total_steps": 37885, "loss": 0.0532, "lr": 1.1898461482414524e-06, "epoch": 2.47657384188993, "percentage": 49.53, "elapsed_time": "0:27:21", "remaining_time": "0:27:52", "throughput": 5626.27, "total_tokens": 9234992} +{"current_steps": 18770, "total_steps": 37885, "loss": 0.0014, "lr": 1.1893938083100568e-06, "epoch": 2.4772337336676786, "percentage": 49.54, "elapsed_time": "0:27:21", "remaining_time": "0:27:51", "throughput": 5626.59, "total_tokens": 9237360} +{"current_steps": 18775, "total_steps": 37885, "loss": 0.0253, "lr": 1.188941428181197e-06, "epoch": 2.477893625445427, "percentage": 49.56, "elapsed_time": "0:27:22", "remaining_time": "0:27:51", "throughput": 5626.88, "total_tokens": 9239664} +{"current_steps": 18780, "total_steps": 37885, "loss": 0.0016, "lr": 1.188489007950887e-06, "epoch": 2.478553517223175, "percentage": 49.57, "elapsed_time": "0:27:22", "remaining_time": "0:27:50", "throughput": 5627.16, "total_tokens": 9241968} +{"current_steps": 18785, "total_steps": 37885, "loss": 0.0, "lr": 1.1880365477151501e-06, "epoch": 2.479213409000924, "percentage": 49.58, "elapsed_time": "0:27:22", "remaining_time": "0:27:50", "throughput": 5627.49, "total_tokens": 9244336} +{"current_steps": 18790, "total_steps": 37885, "loss": 0.0007, "lr": 1.1875840475700175e-06, "epoch": 2.479873300778672, "percentage": 49.6, "elapsed_time": "0:27:23", "remaining_time": "0:27:49", "throughput": 5627.95, "total_tokens": 9246960} +{"current_steps": 18795, "total_steps": 37885, "loss": 0.0004, "lr": 1.1871315076115293e-06, "epoch": 2.480533192556421, "percentage": 49.61, "elapsed_time": "0:27:23", "remaining_time": "0:27:49", "throughput": 5628.24, "total_tokens": 9249264} +{"current_steps": 18800, "total_steps": 37885, "loss": 0.0001, "lr": 1.186678927935734e-06, "epoch": 2.481193084334169, "percentage": 49.62, "elapsed_time": "0:27:23", "remaining_time": "0:27:48", "throughput": 5628.48, "total_tokens": 9251504} +{"current_steps": 18805, "total_steps": 37885, "loss": 0.0001, "lr": 1.1862263086386875e-06, "epoch": 2.4818529761119175, "percentage": 49.64, "elapsed_time": "0:27:24", "remaining_time": "0:27:48", "throughput": 5628.62, "total_tokens": 9253552} +{"current_steps": 18810, "total_steps": 37885, "loss": 0.0002, "lr": 1.1857736498164559e-06, "epoch": 2.482512867889666, "percentage": 49.65, "elapsed_time": "0:27:24", "remaining_time": "0:27:47", "throughput": 5628.97, "total_tokens": 9255984} +{"current_steps": 18815, "total_steps": 37885, "loss": 0.0003, "lr": 1.1853209515651122e-06, "epoch": 2.4831727596674145, "percentage": 49.66, "elapsed_time": "0:27:24", "remaining_time": "0:27:46", "throughput": 5629.3, "total_tokens": 9258352} +{"current_steps": 18820, "total_steps": 37885, "loss": 0.0001, "lr": 1.1848682139807387e-06, "epoch": 2.483832651445163, "percentage": 49.68, "elapsed_time": "0:27:25", "remaining_time": "0:27:46", "throughput": 5629.64, "total_tokens": 9260784} +{"current_steps": 18825, "total_steps": 37885, "loss": 0.0008, "lr": 1.1844154371594254e-06, "epoch": 2.4844925432229115, "percentage": 49.69, "elapsed_time": "0:27:25", "remaining_time": "0:27:45", "throughput": 5630.11, "total_tokens": 9263408} +{"current_steps": 18830, "total_steps": 37885, "loss": 0.0673, "lr": 1.183962621197271e-06, "epoch": 2.4851524350006597, "percentage": 49.7, "elapsed_time": "0:27:25", "remaining_time": "0:27:45", "throughput": 5630.32, "total_tokens": 9265584} +{"current_steps": 18835, "total_steps": 37885, "loss": 0.1876, "lr": 1.1835097661903826e-06, "epoch": 2.4858123267784085, "percentage": 49.72, "elapsed_time": "0:27:25", "remaining_time": "0:27:44", "throughput": 5630.6, "total_tokens": 9267888} +{"current_steps": 18840, "total_steps": 37885, "loss": 0.0001, "lr": 1.1830568722348748e-06, "epoch": 2.4864722185561567, "percentage": 49.73, "elapsed_time": "0:27:26", "remaining_time": "0:27:44", "throughput": 5630.91, "total_tokens": 9270256} +{"current_steps": 18845, "total_steps": 37885, "loss": 0.0006, "lr": 1.182603939426872e-06, "epoch": 2.4871321103339055, "percentage": 49.74, "elapsed_time": "0:27:26", "remaining_time": "0:27:43", "throughput": 5631.41, "total_tokens": 9272944} +{"current_steps": 18850, "total_steps": 37885, "loss": 0.0002, "lr": 1.1821509678625048e-06, "epoch": 2.4877920021116537, "percentage": 49.76, "elapsed_time": "0:27:26", "remaining_time": "0:27:43", "throughput": 5631.77, "total_tokens": 9275376} +{"current_steps": 18855, "total_steps": 37885, "loss": 0.0368, "lr": 1.181697957637914e-06, "epoch": 2.488451893889402, "percentage": 49.77, "elapsed_time": "0:27:27", "remaining_time": "0:27:42", "throughput": 5632.04, "total_tokens": 9277680} +{"current_steps": 18860, "total_steps": 37885, "loss": 0.2397, "lr": 1.1812449088492474e-06, "epoch": 2.4891117856671507, "percentage": 49.78, "elapsed_time": "0:27:27", "remaining_time": "0:27:42", "throughput": 5632.39, "total_tokens": 9280112} +{"current_steps": 18865, "total_steps": 37885, "loss": 0.0019, "lr": 1.1807918215926614e-06, "epoch": 2.489771677444899, "percentage": 49.8, "elapsed_time": "0:27:27", "remaining_time": "0:27:41", "throughput": 5632.75, "total_tokens": 9282544} +{"current_steps": 18870, "total_steps": 37885, "loss": 0.1361, "lr": 1.1803386959643204e-06, "epoch": 2.4904315692226473, "percentage": 49.81, "elapsed_time": "0:27:28", "remaining_time": "0:27:40", "throughput": 5633.11, "total_tokens": 9284976} +{"current_steps": 18875, "total_steps": 37885, "loss": 0.0009, "lr": 1.179885532060397e-06, "epoch": 2.491091461000396, "percentage": 49.82, "elapsed_time": "0:27:28", "remaining_time": "0:27:40", "throughput": 5633.5, "total_tokens": 9287472} +{"current_steps": 18880, "total_steps": 37885, "loss": 0.0829, "lr": 1.1794323299770724e-06, "epoch": 2.4917513527781443, "percentage": 49.84, "elapsed_time": "0:27:28", "remaining_time": "0:27:39", "throughput": 5633.96, "total_tokens": 9290096} +{"current_steps": 18885, "total_steps": 37885, "loss": 0.0012, "lr": 1.1789790898105346e-06, "epoch": 2.492411244555893, "percentage": 49.85, "elapsed_time": "0:27:29", "remaining_time": "0:27:39", "throughput": 5634.29, "total_tokens": 9292464} +{"current_steps": 18890, "total_steps": 37885, "loss": 0.0004, "lr": 1.1785258116569816e-06, "epoch": 2.4930711363336413, "percentage": 49.86, "elapsed_time": "0:27:29", "remaining_time": "0:27:38", "throughput": 5634.64, "total_tokens": 9294896} +{"current_steps": 18895, "total_steps": 37885, "loss": 0.0015, "lr": 1.1780724956126173e-06, "epoch": 2.4937310281113896, "percentage": 49.87, "elapsed_time": "0:27:29", "remaining_time": "0:27:38", "throughput": 5635.06, "total_tokens": 9297456} +{"current_steps": 18900, "total_steps": 37885, "loss": 0.1238, "lr": 1.1776191417736558e-06, "epoch": 2.4943909198891383, "percentage": 49.89, "elapsed_time": "0:27:30", "remaining_time": "0:27:37", "throughput": 5635.43, "total_tokens": 9299888} +{"current_steps": 18905, "total_steps": 37885, "loss": 0.0385, "lr": 1.1771657502363175e-06, "epoch": 2.4950508116668866, "percentage": 49.9, "elapsed_time": "0:27:30", "remaining_time": "0:27:37", "throughput": 5635.96, "total_tokens": 9302640} +{"current_steps": 18910, "total_steps": 37885, "loss": 0.0002, "lr": 1.1767123210968315e-06, "epoch": 2.495710703444635, "percentage": 49.91, "elapsed_time": "0:27:30", "remaining_time": "0:27:36", "throughput": 5636.49, "total_tokens": 9305392} +{"current_steps": 18915, "total_steps": 37885, "loss": 0.0991, "lr": 1.1762588544514352e-06, "epoch": 2.4963705952223836, "percentage": 49.93, "elapsed_time": "0:27:31", "remaining_time": "0:27:36", "throughput": 5636.88, "total_tokens": 9307888} +{"current_steps": 18920, "total_steps": 37885, "loss": 0.0002, "lr": 1.1758053503963733e-06, "epoch": 2.497030487000132, "percentage": 49.94, "elapsed_time": "0:27:31", "remaining_time": "0:27:35", "throughput": 5637.17, "total_tokens": 9310192} +{"current_steps": 18925, "total_steps": 37885, "loss": 0.1931, "lr": 1.1753518090278991e-06, "epoch": 2.4976903787778806, "percentage": 49.95, "elapsed_time": "0:27:31", "remaining_time": "0:27:34", "throughput": 5637.57, "total_tokens": 9312688} +{"current_steps": 18930, "total_steps": 37885, "loss": 0.0463, "lr": 1.1748982304422729e-06, "epoch": 2.498350270555629, "percentage": 49.97, "elapsed_time": "0:27:32", "remaining_time": "0:27:34", "throughput": 5637.98, "total_tokens": 9315248} +{"current_steps": 18935, "total_steps": 37885, "loss": 0.0443, "lr": 1.174444614735764e-06, "epoch": 2.499010162333377, "percentage": 49.98, "elapsed_time": "0:27:32", "remaining_time": "0:27:33", "throughput": 5638.5, "total_tokens": 9318000} +{"current_steps": 18940, "total_steps": 37885, "loss": 0.0663, "lr": 1.1739909620046485e-06, "epoch": 2.499670054111126, "percentage": 49.99, "elapsed_time": "0:27:32", "remaining_time": "0:27:33", "throughput": 5639.03, "total_tokens": 9320752} +{"current_steps": 18945, "total_steps": 37885, "loss": 0.0352, "lr": 1.1735372723452114e-06, "epoch": 2.500329945888874, "percentage": 50.01, "elapsed_time": "0:27:33", "remaining_time": "0:27:32", "throughput": 5639.64, "total_tokens": 9323632} +{"current_steps": 18950, "total_steps": 37885, "loss": 0.0962, "lr": 1.1730835458537454e-06, "epoch": 2.500989837666623, "percentage": 50.02, "elapsed_time": "0:27:33", "remaining_time": "0:27:32", "throughput": 5640.09, "total_tokens": 9326256} +{"current_steps": 18950, "total_steps": 37885, "eval_loss": 0.11418119072914124, "epoch": 2.500989837666623, "percentage": 50.02, "elapsed_time": "0:27:41", "remaining_time": "0:27:40", "throughput": 5613.2, "total_tokens": 9326256} +{"current_steps": 18955, "total_steps": 37885, "loss": 0.0332, "lr": 1.1726297826265497e-06, "epoch": 2.501649729444371, "percentage": 50.03, "elapsed_time": "0:28:15", "remaining_time": "0:28:13", "throughput": 5500.73, "total_tokens": 9328688} +{"current_steps": 18960, "total_steps": 37885, "loss": 0.0693, "lr": 1.1721759827599326e-06, "epoch": 2.5023096212221194, "percentage": 50.05, "elapsed_time": "0:28:16", "remaining_time": "0:28:13", "throughput": 5501.2, "total_tokens": 9331312} +{"current_steps": 18965, "total_steps": 37885, "loss": 0.0561, "lr": 1.1717221463502102e-06, "epoch": 2.502969512999868, "percentage": 50.06, "elapsed_time": "0:28:16", "remaining_time": "0:28:12", "throughput": 5501.65, "total_tokens": 9333872} +{"current_steps": 18970, "total_steps": 37885, "loss": 0.0002, "lr": 1.1712682734937058e-06, "epoch": 2.5036294047776164, "percentage": 50.07, "elapsed_time": "0:28:16", "remaining_time": "0:28:11", "throughput": 5501.94, "total_tokens": 9336176} +{"current_steps": 18975, "total_steps": 37885, "loss": 0.0008, "lr": 1.1708143642867506e-06, "epoch": 2.504289296555365, "percentage": 50.09, "elapsed_time": "0:28:17", "remaining_time": "0:28:11", "throughput": 5502.43, "total_tokens": 9338800} +{"current_steps": 18980, "total_steps": 37885, "loss": 0.0002, "lr": 1.1703604188256833e-06, "epoch": 2.5049491883331134, "percentage": 50.1, "elapsed_time": "0:28:17", "remaining_time": "0:28:10", "throughput": 5502.8, "total_tokens": 9341232} +{"current_steps": 18985, "total_steps": 37885, "loss": 0.0045, "lr": 1.169906437206851e-06, "epoch": 2.5056090801108617, "percentage": 50.11, "elapsed_time": "0:28:17", "remaining_time": "0:28:10", "throughput": 5503.18, "total_tokens": 9343664} +{"current_steps": 18990, "total_steps": 37885, "loss": 0.0005, "lr": 1.1694524195266077e-06, "epoch": 2.5062689718886104, "percentage": 50.13, "elapsed_time": "0:28:18", "remaining_time": "0:28:09", "throughput": 5503.52, "total_tokens": 9346096} +{"current_steps": 18995, "total_steps": 37885, "loss": 0.1795, "lr": 1.1689983658813152e-06, "epoch": 2.5069288636663587, "percentage": 50.14, "elapsed_time": "0:28:18", "remaining_time": "0:28:09", "throughput": 5503.92, "total_tokens": 9348592} +{"current_steps": 19000, "total_steps": 37885, "loss": 0.0014, "lr": 1.1685442763673436e-06, "epoch": 2.5075887554441074, "percentage": 50.15, "elapsed_time": "0:28:18", "remaining_time": "0:28:08", "throughput": 5504.31, "total_tokens": 9351088} +{"current_steps": 19005, "total_steps": 37885, "loss": 0.0008, "lr": 1.16809015108107e-06, "epoch": 2.5082486472218557, "percentage": 50.16, "elapsed_time": "0:28:19", "remaining_time": "0:28:08", "throughput": 5504.74, "total_tokens": 9353648} +{"current_steps": 19010, "total_steps": 37885, "loss": 0.0019, "lr": 1.1676359901188785e-06, "epoch": 2.508908538999604, "percentage": 50.18, "elapsed_time": "0:28:19", "remaining_time": "0:28:07", "throughput": 5505.17, "total_tokens": 9356208} +{"current_steps": 19015, "total_steps": 37885, "loss": 0.0006, "lr": 1.1671817935771623e-06, "epoch": 2.5095684307773523, "percentage": 50.19, "elapsed_time": "0:28:19", "remaining_time": "0:28:06", "throughput": 5505.36, "total_tokens": 9358320} +{"current_steps": 19020, "total_steps": 37885, "loss": 0.046, "lr": 1.166727561552321e-06, "epoch": 2.510228322555101, "percentage": 50.2, "elapsed_time": "0:28:20", "remaining_time": "0:28:06", "throughput": 5505.69, "total_tokens": 9360688} +{"current_steps": 19025, "total_steps": 37885, "loss": 0.0188, "lr": 1.1662732941407625e-06, "epoch": 2.5108882143328493, "percentage": 50.22, "elapsed_time": "0:28:20", "remaining_time": "0:28:05", "throughput": 5506.13, "total_tokens": 9363248} +{"current_steps": 19030, "total_steps": 37885, "loss": 0.0165, "lr": 1.165818991438901e-06, "epoch": 2.511548106110598, "percentage": 50.23, "elapsed_time": "0:28:20", "remaining_time": "0:28:05", "throughput": 5506.6, "total_tokens": 9365872} +{"current_steps": 19035, "total_steps": 37885, "loss": 0.0002, "lr": 1.1653646535431593e-06, "epoch": 2.5122079978883463, "percentage": 50.24, "elapsed_time": "0:28:21", "remaining_time": "0:28:04", "throughput": 5507.0, "total_tokens": 9368368} +{"current_steps": 19040, "total_steps": 37885, "loss": 0.0004, "lr": 1.1649102805499676e-06, "epoch": 2.5128678896660945, "percentage": 50.26, "elapsed_time": "0:28:21", "remaining_time": "0:28:04", "throughput": 5507.5, "total_tokens": 9371056} +{"current_steps": 19045, "total_steps": 37885, "loss": 0.0001, "lr": 1.1644558725557627e-06, "epoch": 2.5135277814438433, "percentage": 50.27, "elapsed_time": "0:28:21", "remaining_time": "0:28:03", "throughput": 5507.97, "total_tokens": 9373680} +{"current_steps": 19050, "total_steps": 37885, "loss": 0.0, "lr": 1.16400142965699e-06, "epoch": 2.5141876732215915, "percentage": 50.28, "elapsed_time": "0:28:22", "remaining_time": "0:28:02", "throughput": 5508.23, "total_tokens": 9375920} +{"current_steps": 19055, "total_steps": 37885, "loss": 0.1645, "lr": 1.1635469519501015e-06, "epoch": 2.5148475649993403, "percentage": 50.3, "elapsed_time": "0:28:22", "remaining_time": "0:28:02", "throughput": 5508.73, "total_tokens": 9378608} +{"current_steps": 19060, "total_steps": 37885, "loss": 0.0565, "lr": 1.1630924395315565e-06, "epoch": 2.5155074567770885, "percentage": 50.31, "elapsed_time": "0:28:22", "remaining_time": "0:28:01", "throughput": 5508.95, "total_tokens": 9380784} +{"current_steps": 19065, "total_steps": 37885, "loss": 0.0412, "lr": 1.1626378924978223e-06, "epoch": 2.516167348554837, "percentage": 50.32, "elapsed_time": "0:28:23", "remaining_time": "0:28:01", "throughput": 5509.32, "total_tokens": 9383216} +{"current_steps": 19070, "total_steps": 37885, "loss": 0.0311, "lr": 1.1621833109453734e-06, "epoch": 2.5168272403325855, "percentage": 50.34, "elapsed_time": "0:28:23", "remaining_time": "0:28:00", "throughput": 5509.71, "total_tokens": 9385712} +{"current_steps": 19075, "total_steps": 37885, "loss": 0.0527, "lr": 1.161728694970691e-06, "epoch": 2.517487132110334, "percentage": 50.35, "elapsed_time": "0:28:23", "remaining_time": "0:28:00", "throughput": 5509.93, "total_tokens": 9387888} +{"current_steps": 19080, "total_steps": 37885, "loss": 0.0464, "lr": 1.1612740446702645e-06, "epoch": 2.5181470238880825, "percentage": 50.36, "elapsed_time": "0:28:24", "remaining_time": "0:27:59", "throughput": 5510.22, "total_tokens": 9390192} +{"current_steps": 19085, "total_steps": 37885, "loss": 0.0352, "lr": 1.1608193601405894e-06, "epoch": 2.518806915665831, "percentage": 50.38, "elapsed_time": "0:28:24", "remaining_time": "0:27:59", "throughput": 5510.69, "total_tokens": 9392816} +{"current_steps": 19090, "total_steps": 37885, "loss": 0.0001, "lr": 1.1603646414781701e-06, "epoch": 2.519466807443579, "percentage": 50.39, "elapsed_time": "0:28:24", "remaining_time": "0:27:58", "throughput": 5511.01, "total_tokens": 9395184} +{"current_steps": 19095, "total_steps": 37885, "loss": 0.0002, "lr": 1.1599098887795164e-06, "epoch": 2.520126699221328, "percentage": 50.4, "elapsed_time": "0:28:25", "remaining_time": "0:27:57", "throughput": 5511.16, "total_tokens": 9397232} +{"current_steps": 19100, "total_steps": 37885, "loss": 0.0002, "lr": 1.1594551021411473e-06, "epoch": 2.520786590999076, "percentage": 50.42, "elapsed_time": "0:28:25", "remaining_time": "0:27:57", "throughput": 5511.53, "total_tokens": 9399664} +{"current_steps": 19105, "total_steps": 37885, "loss": 0.0001, "lr": 1.1590002816595874e-06, "epoch": 2.521446482776825, "percentage": 50.43, "elapsed_time": "0:28:25", "remaining_time": "0:27:56", "throughput": 5511.81, "total_tokens": 9401968} +{"current_steps": 19110, "total_steps": 37885, "loss": 0.1127, "lr": 1.158545427431369e-06, "epoch": 2.522106374554573, "percentage": 50.44, "elapsed_time": "0:28:26", "remaining_time": "0:27:56", "throughput": 5512.18, "total_tokens": 9404400} +{"current_steps": 19115, "total_steps": 37885, "loss": 0.073, "lr": 1.1580905395530317e-06, "epoch": 2.5227662663323214, "percentage": 50.46, "elapsed_time": "0:28:26", "remaining_time": "0:27:55", "throughput": 5512.53, "total_tokens": 9406832} +{"current_steps": 19120, "total_steps": 37885, "loss": 0.0006, "lr": 1.1576356181211223e-06, "epoch": 2.52342615811007, "percentage": 50.47, "elapsed_time": "0:28:26", "remaining_time": "0:27:55", "throughput": 5512.89, "total_tokens": 9409264} +{"current_steps": 19125, "total_steps": 37885, "loss": 0.1069, "lr": 1.1571806632321941e-06, "epoch": 2.5240860498878184, "percentage": 50.48, "elapsed_time": "0:28:27", "remaining_time": "0:27:54", "throughput": 5513.32, "total_tokens": 9411824} +{"current_steps": 19130, "total_steps": 37885, "loss": 0.0614, "lr": 1.1567256749828088e-06, "epoch": 2.524745941665567, "percentage": 50.49, "elapsed_time": "0:28:27", "remaining_time": "0:27:53", "throughput": 5513.71, "total_tokens": 9414320} +{"current_steps": 19135, "total_steps": 37885, "loss": 0.0487, "lr": 1.1562706534695337e-06, "epoch": 2.5254058334433154, "percentage": 50.51, "elapsed_time": "0:28:27", "remaining_time": "0:27:53", "throughput": 5514.03, "total_tokens": 9416688} +{"current_steps": 19140, "total_steps": 37885, "loss": 0.0504, "lr": 1.1558155987889437e-06, "epoch": 2.5260657252210637, "percentage": 50.52, "elapsed_time": "0:28:28", "remaining_time": "0:27:52", "throughput": 5514.38, "total_tokens": 9419120} +{"current_steps": 19145, "total_steps": 37885, "loss": 0.0001, "lr": 1.1553605110376216e-06, "epoch": 2.526725616998812, "percentage": 50.53, "elapsed_time": "0:28:28", "remaining_time": "0:27:52", "throughput": 5514.75, "total_tokens": 9421552} +{"current_steps": 19150, "total_steps": 37885, "loss": 0.0725, "lr": 1.154905390312156e-06, "epoch": 2.5273855087765607, "percentage": 50.55, "elapsed_time": "0:28:28", "remaining_time": "0:27:51", "throughput": 5515.18, "total_tokens": 9424112} +{"current_steps": 19155, "total_steps": 37885, "loss": 0.0382, "lr": 1.1544502367091428e-06, "epoch": 2.528045400554309, "percentage": 50.56, "elapsed_time": "0:28:29", "remaining_time": "0:27:51", "throughput": 5515.44, "total_tokens": 9426352} +{"current_steps": 19160, "total_steps": 37885, "loss": 0.0905, "lr": 1.1539950503251858e-06, "epoch": 2.5287052923320577, "percentage": 50.57, "elapsed_time": "0:28:29", "remaining_time": "0:27:50", "throughput": 5515.94, "total_tokens": 9429040} +{"current_steps": 19165, "total_steps": 37885, "loss": 0.0015, "lr": 1.153539831256894e-06, "epoch": 2.529365184109806, "percentage": 50.59, "elapsed_time": "0:28:29", "remaining_time": "0:27:50", "throughput": 5516.2, "total_tokens": 9431280} +{"current_steps": 19170, "total_steps": 37885, "loss": 0.023, "lr": 1.1530845796008853e-06, "epoch": 2.530025075887554, "percentage": 50.6, "elapsed_time": "0:28:30", "remaining_time": "0:27:49", "throughput": 5516.53, "total_tokens": 9433648} +{"current_steps": 19175, "total_steps": 37885, "loss": 0.0277, "lr": 1.1526292954537827e-06, "epoch": 2.530684967665303, "percentage": 50.61, "elapsed_time": "0:28:30", "remaining_time": "0:27:48", "throughput": 5517.06, "total_tokens": 9436400} +{"current_steps": 19180, "total_steps": 37885, "loss": 0.0384, "lr": 1.1521739789122179e-06, "epoch": 2.531344859443051, "percentage": 50.63, "elapsed_time": "0:28:30", "remaining_time": "0:27:48", "throughput": 5517.46, "total_tokens": 9438896} +{"current_steps": 19185, "total_steps": 37885, "loss": 0.0001, "lr": 1.1517186300728276e-06, "epoch": 2.5320047512208, "percentage": 50.64, "elapsed_time": "0:28:31", "remaining_time": "0:27:47", "throughput": 5517.71, "total_tokens": 9441136} +{"current_steps": 19190, "total_steps": 37885, "loss": 0.0023, "lr": 1.151263249032257e-06, "epoch": 2.532664642998548, "percentage": 50.65, "elapsed_time": "0:28:31", "remaining_time": "0:27:47", "throughput": 5517.98, "total_tokens": 9443376} +{"current_steps": 19195, "total_steps": 37885, "loss": 0.049, "lr": 1.150807835887157e-06, "epoch": 2.5333245347762965, "percentage": 50.67, "elapsed_time": "0:28:31", "remaining_time": "0:27:46", "throughput": 5518.34, "total_tokens": 9445808} +{"current_steps": 19200, "total_steps": 37885, "loss": 0.1468, "lr": 1.1503523907341858e-06, "epoch": 2.533984426554045, "percentage": 50.68, "elapsed_time": "0:28:32", "remaining_time": "0:27:46", "throughput": 5518.56, "total_tokens": 9447984} +{"current_steps": 19205, "total_steps": 37885, "loss": 0.0019, "lr": 1.1498969136700087e-06, "epoch": 2.5346443183317935, "percentage": 50.69, "elapsed_time": "0:28:32", "remaining_time": "0:27:45", "throughput": 5518.89, "total_tokens": 9450352} +{"current_steps": 19210, "total_steps": 37885, "loss": 0.0008, "lr": 1.1494414047912967e-06, "epoch": 2.535304210109542, "percentage": 50.71, "elapsed_time": "0:28:32", "remaining_time": "0:27:44", "throughput": 5519.35, "total_tokens": 9452976} +{"current_steps": 19215, "total_steps": 37885, "loss": 0.0011, "lr": 1.1489858641947292e-06, "epoch": 2.5359641018872905, "percentage": 50.72, "elapsed_time": "0:28:33", "remaining_time": "0:27:44", "throughput": 5519.79, "total_tokens": 9455536} +{"current_steps": 19220, "total_steps": 37885, "loss": 0.0336, "lr": 1.1485302919769906e-06, "epoch": 2.5366239936650388, "percentage": 50.73, "elapsed_time": "0:28:33", "remaining_time": "0:27:43", "throughput": 5520.28, "total_tokens": 9458224} +{"current_steps": 19225, "total_steps": 37885, "loss": 0.0001, "lr": 1.1480746882347733e-06, "epoch": 2.5372838854427875, "percentage": 50.75, "elapsed_time": "0:28:33", "remaining_time": "0:27:43", "throughput": 5520.6, "total_tokens": 9460592} +{"current_steps": 19230, "total_steps": 37885, "loss": 0.0004, "lr": 1.1476190530647754e-06, "epoch": 2.5379437772205358, "percentage": 50.76, "elapsed_time": "0:28:34", "remaining_time": "0:27:42", "throughput": 5521.04, "total_tokens": 9463152} +{"current_steps": 19235, "total_steps": 37885, "loss": 0.077, "lr": 1.1471633865637027e-06, "epoch": 2.5386036689982845, "percentage": 50.77, "elapsed_time": "0:28:34", "remaining_time": "0:27:42", "throughput": 5521.47, "total_tokens": 9465712} +{"current_steps": 19240, "total_steps": 37885, "loss": 0.0003, "lr": 1.146707688828267e-06, "epoch": 2.5392635607760328, "percentage": 50.79, "elapsed_time": "0:28:34", "remaining_time": "0:27:41", "throughput": 5521.96, "total_tokens": 9468400} +{"current_steps": 19245, "total_steps": 37885, "loss": 0.0, "lr": 1.1462519599551864e-06, "epoch": 2.539923452553781, "percentage": 50.8, "elapsed_time": "0:28:35", "remaining_time": "0:27:41", "throughput": 5522.24, "total_tokens": 9470704} +{"current_steps": 19250, "total_steps": 37885, "loss": 0.2087, "lr": 1.1457962000411864e-06, "epoch": 2.5405833443315298, "percentage": 50.81, "elapsed_time": "0:28:35", "remaining_time": "0:27:40", "throughput": 5522.5, "total_tokens": 9472944} +{"current_steps": 19255, "total_steps": 37885, "loss": 0.0457, "lr": 1.1453404091829987e-06, "epoch": 2.541243236109278, "percentage": 50.82, "elapsed_time": "0:28:35", "remaining_time": "0:27:39", "throughput": 5522.89, "total_tokens": 9475440} +{"current_steps": 19260, "total_steps": 37885, "loss": 0.0014, "lr": 1.1448845874773623e-06, "epoch": 2.5419031278870268, "percentage": 50.84, "elapsed_time": "0:28:35", "remaining_time": "0:27:39", "throughput": 5523.2, "total_tokens": 9477808} +{"current_steps": 19265, "total_steps": 37885, "loss": 0.0019, "lr": 1.1444287350210208e-06, "epoch": 2.542563019664775, "percentage": 50.85, "elapsed_time": "0:28:36", "remaining_time": "0:27:38", "throughput": 5523.63, "total_tokens": 9480368} +{"current_steps": 19270, "total_steps": 37885, "loss": 0.0003, "lr": 1.143972851910726e-06, "epoch": 2.5432229114425233, "percentage": 50.86, "elapsed_time": "0:28:36", "remaining_time": "0:27:38", "throughput": 5524.01, "total_tokens": 9482864} +{"current_steps": 19275, "total_steps": 37885, "loss": 0.0, "lr": 1.143516938243236e-06, "epoch": 2.5438828032202716, "percentage": 50.88, "elapsed_time": "0:28:36", "remaining_time": "0:27:37", "throughput": 5524.47, "total_tokens": 9485488} +{"current_steps": 19280, "total_steps": 37885, "loss": 0.0001, "lr": 1.1430609941153154e-06, "epoch": 2.5445426949980203, "percentage": 50.89, "elapsed_time": "0:28:37", "remaining_time": "0:27:37", "throughput": 5524.79, "total_tokens": 9487856} +{"current_steps": 19285, "total_steps": 37885, "loss": 0.0015, "lr": 1.1426050196237347e-06, "epoch": 2.545202586775769, "percentage": 50.9, "elapsed_time": "0:28:37", "remaining_time": "0:27:36", "throughput": 5524.96, "total_tokens": 9489968} +{"current_steps": 19290, "total_steps": 37885, "loss": 0.0698, "lr": 1.142149014865271e-06, "epoch": 2.5458624785535173, "percentage": 50.92, "elapsed_time": "0:28:37", "remaining_time": "0:27:36", "throughput": 5525.36, "total_tokens": 9492464} +{"current_steps": 19295, "total_steps": 37885, "loss": 0.0744, "lr": 1.1416929799367086e-06, "epoch": 2.5465223703312656, "percentage": 50.93, "elapsed_time": "0:28:38", "remaining_time": "0:27:35", "throughput": 5525.76, "total_tokens": 9495024} +{"current_steps": 19300, "total_steps": 37885, "loss": 0.0001, "lr": 1.141236914934837e-06, "epoch": 2.547182262109014, "percentage": 50.94, "elapsed_time": "0:28:38", "remaining_time": "0:27:34", "throughput": 5526.22, "total_tokens": 9497648} +{"current_steps": 19305, "total_steps": 37885, "loss": 0.0009, "lr": 1.1407808199564532e-06, "epoch": 2.5478421538867626, "percentage": 50.96, "elapsed_time": "0:28:38", "remaining_time": "0:27:34", "throughput": 5526.45, "total_tokens": 9499824} +{"current_steps": 19310, "total_steps": 37885, "loss": 0.1192, "lr": 1.1403246950983598e-06, "epoch": 2.548502045664511, "percentage": 50.97, "elapsed_time": "0:28:39", "remaining_time": "0:27:33", "throughput": 5526.7, "total_tokens": 9502064} +{"current_steps": 19315, "total_steps": 37885, "loss": 0.0003, "lr": 1.1398685404573657e-06, "epoch": 2.5491619374422596, "percentage": 50.98, "elapsed_time": "0:28:39", "remaining_time": "0:27:33", "throughput": 5527.08, "total_tokens": 9504560} +{"current_steps": 19320, "total_steps": 37885, "loss": 0.0002, "lr": 1.139412356130287e-06, "epoch": 2.549821829220008, "percentage": 51.0, "elapsed_time": "0:28:39", "remaining_time": "0:27:32", "throughput": 5527.49, "total_tokens": 9507120} +{"current_steps": 19325, "total_steps": 37885, "loss": 0.0831, "lr": 1.138956142213945e-06, "epoch": 2.550481720997756, "percentage": 51.01, "elapsed_time": "0:28:40", "remaining_time": "0:27:32", "throughput": 5527.85, "total_tokens": 9509552} +{"current_steps": 19330, "total_steps": 37885, "loss": 0.0001, "lr": 1.1384998988051684e-06, "epoch": 2.551141612775505, "percentage": 51.02, "elapsed_time": "0:28:40", "remaining_time": "0:27:31", "throughput": 5528.37, "total_tokens": 9512304} +{"current_steps": 19335, "total_steps": 37885, "loss": 0.0749, "lr": 1.1380436260007914e-06, "epoch": 2.551801504553253, "percentage": 51.04, "elapsed_time": "0:28:40", "remaining_time": "0:27:31", "throughput": 5528.73, "total_tokens": 9514736} +{"current_steps": 19340, "total_steps": 37885, "loss": 0.0802, "lr": 1.1375873238976542e-06, "epoch": 2.552461396331002, "percentage": 51.05, "elapsed_time": "0:28:41", "remaining_time": "0:27:30", "throughput": 5529.1, "total_tokens": 9517232} +{"current_steps": 19345, "total_steps": 37885, "loss": 0.0005, "lr": 1.1371309925926034e-06, "epoch": 2.55312128810875, "percentage": 51.06, "elapsed_time": "0:28:41", "remaining_time": "0:27:29", "throughput": 5529.34, "total_tokens": 9519472} +{"current_steps": 19350, "total_steps": 37885, "loss": 0.0006, "lr": 1.1366746321824928e-06, "epoch": 2.5537811798864984, "percentage": 51.08, "elapsed_time": "0:28:41", "remaining_time": "0:27:29", "throughput": 5529.61, "total_tokens": 9521776} +{"current_steps": 19355, "total_steps": 37885, "loss": 0.0014, "lr": 1.1362182427641812e-06, "epoch": 2.554441071664247, "percentage": 51.09, "elapsed_time": "0:28:42", "remaining_time": "0:27:28", "throughput": 5529.96, "total_tokens": 9524208} +{"current_steps": 19360, "total_steps": 37885, "loss": 0.0002, "lr": 1.135761824434534e-06, "epoch": 2.5551009634419954, "percentage": 51.1, "elapsed_time": "0:28:42", "remaining_time": "0:27:28", "throughput": 5530.36, "total_tokens": 9526768} +{"current_steps": 19365, "total_steps": 37885, "loss": 0.0013, "lr": 1.135305377290423e-06, "epoch": 2.555760855219744, "percentage": 51.12, "elapsed_time": "0:28:42", "remaining_time": "0:27:27", "throughput": 5530.7, "total_tokens": 9529200} +{"current_steps": 19370, "total_steps": 37885, "loss": 0.1098, "lr": 1.1348489014287248e-06, "epoch": 2.5564207469974924, "percentage": 51.13, "elapsed_time": "0:28:43", "remaining_time": "0:27:27", "throughput": 5531.13, "total_tokens": 9531824} +{"current_steps": 19375, "total_steps": 37885, "loss": 0.0411, "lr": 1.1343923969463243e-06, "epoch": 2.5570806387752407, "percentage": 51.14, "elapsed_time": "0:28:43", "remaining_time": "0:27:26", "throughput": 5531.44, "total_tokens": 9534192} +{"current_steps": 19380, "total_steps": 37885, "loss": 0.0, "lr": 1.1339358639401103e-06, "epoch": 2.5577405305529894, "percentage": 51.15, "elapsed_time": "0:28:43", "remaining_time": "0:27:26", "throughput": 5531.82, "total_tokens": 9536688} +{"current_steps": 19385, "total_steps": 37885, "loss": 0.0505, "lr": 1.1334793025069794e-06, "epoch": 2.5584004223307377, "percentage": 51.17, "elapsed_time": "0:28:44", "remaining_time": "0:27:25", "throughput": 5532.23, "total_tokens": 9539248} +{"current_steps": 19390, "total_steps": 37885, "loss": 0.0818, "lr": 1.1330227127438332e-06, "epoch": 2.5590603141084864, "percentage": 51.18, "elapsed_time": "0:28:44", "remaining_time": "0:27:25", "throughput": 5532.7, "total_tokens": 9541936} +{"current_steps": 19395, "total_steps": 37885, "loss": 0.087, "lr": 1.1325660947475792e-06, "epoch": 2.5597202058862347, "percentage": 51.19, "elapsed_time": "0:28:44", "remaining_time": "0:27:24", "throughput": 5532.96, "total_tokens": 9544240} +{"current_steps": 19400, "total_steps": 37885, "loss": 0.0565, "lr": 1.1321094486151317e-06, "epoch": 2.560380097663983, "percentage": 51.21, "elapsed_time": "0:28:45", "remaining_time": "0:27:23", "throughput": 5533.27, "total_tokens": 9546608} +{"current_steps": 19405, "total_steps": 37885, "loss": 0.0001, "lr": 1.1316527744434104e-06, "epoch": 2.5610399894417313, "percentage": 51.22, "elapsed_time": "0:28:45", "remaining_time": "0:27:23", "throughput": 5533.62, "total_tokens": 9549040} +{"current_steps": 19410, "total_steps": 37885, "loss": 0.0177, "lr": 1.131196072329341e-06, "epoch": 2.56169988121948, "percentage": 51.23, "elapsed_time": "0:28:45", "remaining_time": "0:27:22", "throughput": 5534.12, "total_tokens": 9551792} +{"current_steps": 19415, "total_steps": 37885, "loss": 0.0007, "lr": 1.1307393423698555e-06, "epoch": 2.5623597729972287, "percentage": 51.25, "elapsed_time": "0:28:46", "remaining_time": "0:27:22", "throughput": 5534.57, "total_tokens": 9554480} +{"current_steps": 19420, "total_steps": 37885, "loss": 0.0, "lr": 1.1302825846618912e-06, "epoch": 2.563019664774977, "percentage": 51.26, "elapsed_time": "0:28:46", "remaining_time": "0:27:21", "throughput": 5534.97, "total_tokens": 9557040} +{"current_steps": 19425, "total_steps": 37885, "loss": 0.0352, "lr": 1.1298257993023917e-06, "epoch": 2.5636795565527253, "percentage": 51.27, "elapsed_time": "0:28:47", "remaining_time": "0:27:21", "throughput": 5535.37, "total_tokens": 9559600} +{"current_steps": 19430, "total_steps": 37885, "loss": 0.0736, "lr": 1.1293689863883062e-06, "epoch": 2.5643394483304736, "percentage": 51.29, "elapsed_time": "0:28:47", "remaining_time": "0:27:20", "throughput": 5535.74, "total_tokens": 9562096} +{"current_steps": 19435, "total_steps": 37885, "loss": 0.0001, "lr": 1.1289121460165907e-06, "epoch": 2.5649993401082223, "percentage": 51.3, "elapsed_time": "0:28:47", "remaining_time": "0:27:20", "throughput": 5536.01, "total_tokens": 9564400} +{"current_steps": 19440, "total_steps": 37885, "loss": 0.0911, "lr": 1.1284552782842054e-06, "epoch": 2.5656592318859706, "percentage": 51.31, "elapsed_time": "0:28:47", "remaining_time": "0:27:19", "throughput": 5536.33, "total_tokens": 9566768} +{"current_steps": 19445, "total_steps": 37885, "loss": 0.0004, "lr": 1.1279983832881174e-06, "epoch": 2.5663191236637193, "percentage": 51.33, "elapsed_time": "0:28:48", "remaining_time": "0:27:19", "throughput": 5536.91, "total_tokens": 9569648} +{"current_steps": 19450, "total_steps": 37885, "loss": 0.0241, "lr": 1.1275414611252996e-06, "epoch": 2.5669790154414676, "percentage": 51.34, "elapsed_time": "0:28:48", "remaining_time": "0:27:18", "throughput": 5537.48, "total_tokens": 9572528} +{"current_steps": 19455, "total_steps": 37885, "loss": 0.0002, "lr": 1.1270845118927304e-06, "epoch": 2.567638907219216, "percentage": 51.35, "elapsed_time": "0:28:49", "remaining_time": "0:27:17", "throughput": 5537.92, "total_tokens": 9575152} +{"current_steps": 19460, "total_steps": 37885, "loss": 0.0428, "lr": 1.1266275356873933e-06, "epoch": 2.5682987989969646, "percentage": 51.37, "elapsed_time": "0:28:49", "remaining_time": "0:27:17", "throughput": 5538.32, "total_tokens": 9577712} +{"current_steps": 19465, "total_steps": 37885, "loss": 0.001, "lr": 1.1261705326062792e-06, "epoch": 2.568958690774713, "percentage": 51.38, "elapsed_time": "0:28:49", "remaining_time": "0:27:16", "throughput": 5538.55, "total_tokens": 9579952} +{"current_steps": 19470, "total_steps": 37885, "loss": 0.1439, "lr": 1.1257135027463831e-06, "epoch": 2.5696185825524616, "percentage": 51.39, "elapsed_time": "0:28:50", "remaining_time": "0:27:16", "throughput": 5538.96, "total_tokens": 9582512} +{"current_steps": 19475, "total_steps": 37885, "loss": 0.0001, "lr": 1.1252564462047063e-06, "epoch": 2.57027847433021, "percentage": 51.41, "elapsed_time": "0:28:50", "remaining_time": "0:27:15", "throughput": 5539.12, "total_tokens": 9584624} +{"current_steps": 19480, "total_steps": 37885, "loss": 0.0707, "lr": 1.124799363078256e-06, "epoch": 2.570938366107958, "percentage": 51.42, "elapsed_time": "0:28:50", "remaining_time": "0:27:15", "throughput": 5539.45, "total_tokens": 9587056} +{"current_steps": 19485, "total_steps": 37885, "loss": 0.0002, "lr": 1.1243422534640443e-06, "epoch": 2.571598257885707, "percentage": 51.43, "elapsed_time": "0:28:51", "remaining_time": "0:27:14", "throughput": 5539.65, "total_tokens": 9589232} +{"current_steps": 19490, "total_steps": 37885, "loss": 0.0706, "lr": 1.12388511745909e-06, "epoch": 2.572258149663455, "percentage": 51.45, "elapsed_time": "0:28:51", "remaining_time": "0:27:14", "throughput": 5540.08, "total_tokens": 9591792} +{"current_steps": 19495, "total_steps": 37885, "loss": 0.0012, "lr": 1.1234279551604164e-06, "epoch": 2.572918041441204, "percentage": 51.46, "elapsed_time": "0:28:51", "remaining_time": "0:27:13", "throughput": 5540.47, "total_tokens": 9594352} +{"current_steps": 19500, "total_steps": 37885, "loss": 0.0584, "lr": 1.1229707666650531e-06, "epoch": 2.573577933218952, "percentage": 51.47, "elapsed_time": "0:28:52", "remaining_time": "0:27:12", "throughput": 5541.0, "total_tokens": 9597168} +{"current_steps": 19505, "total_steps": 37885, "loss": 0.0311, "lr": 1.1225135520700355e-06, "epoch": 2.5742378249967004, "percentage": 51.48, "elapsed_time": "0:28:52", "remaining_time": "0:27:12", "throughput": 5541.4, "total_tokens": 9599728} +{"current_steps": 19510, "total_steps": 37885, "loss": 0.0561, "lr": 1.122056311472403e-06, "epoch": 2.574897716774449, "percentage": 51.5, "elapsed_time": "0:28:52", "remaining_time": "0:27:11", "throughput": 5541.71, "total_tokens": 9602096} +{"current_steps": 19515, "total_steps": 37885, "loss": 0.0667, "lr": 1.121599044969203e-06, "epoch": 2.5755576085521974, "percentage": 51.51, "elapsed_time": "0:28:53", "remaining_time": "0:27:11", "throughput": 5542.01, "total_tokens": 9604464} +{"current_steps": 19520, "total_steps": 37885, "loss": 0.0006, "lr": 1.1211417526574858e-06, "epoch": 2.576217500329946, "percentage": 51.52, "elapsed_time": "0:28:53", "remaining_time": "0:27:10", "throughput": 5542.35, "total_tokens": 9606896} +{"current_steps": 19525, "total_steps": 37885, "loss": 0.0013, "lr": 1.1206844346343089e-06, "epoch": 2.5768773921076944, "percentage": 51.54, "elapsed_time": "0:28:53", "remaining_time": "0:27:10", "throughput": 5542.78, "total_tokens": 9609520} +{"current_steps": 19530, "total_steps": 37885, "loss": 0.0006, "lr": 1.1202270909967347e-06, "epoch": 2.5775372838854427, "percentage": 51.55, "elapsed_time": "0:28:54", "remaining_time": "0:27:09", "throughput": 5543.32, "total_tokens": 9612336} +{"current_steps": 19535, "total_steps": 37885, "loss": 0.0861, "lr": 1.119769721841831e-06, "epoch": 2.5781971756631914, "percentage": 51.56, "elapsed_time": "0:28:54", "remaining_time": "0:27:09", "throughput": 5543.42, "total_tokens": 9614320} +{"current_steps": 19540, "total_steps": 37885, "loss": 0.0004, "lr": 1.119312327266671e-06, "epoch": 2.5788570674409397, "percentage": 51.58, "elapsed_time": "0:28:54", "remaining_time": "0:27:08", "throughput": 5543.95, "total_tokens": 9617136} +{"current_steps": 19545, "total_steps": 37885, "loss": 0.0001, "lr": 1.1188549073683338e-06, "epoch": 2.5795169592186884, "percentage": 51.59, "elapsed_time": "0:28:55", "remaining_time": "0:27:08", "throughput": 5544.38, "total_tokens": 9619760} +{"current_steps": 19550, "total_steps": 37885, "loss": 0.0, "lr": 1.1183974622439032e-06, "epoch": 2.5801768509964367, "percentage": 51.6, "elapsed_time": "0:28:55", "remaining_time": "0:27:07", "throughput": 5544.81, "total_tokens": 9622320} +{"current_steps": 19555, "total_steps": 37885, "loss": 0.0006, "lr": 1.1179399919904683e-06, "epoch": 2.580836742774185, "percentage": 51.62, "elapsed_time": "0:28:55", "remaining_time": "0:27:06", "throughput": 5545.22, "total_tokens": 9624880} +{"current_steps": 19560, "total_steps": 37885, "loss": 0.0987, "lr": 1.1174824967051244e-06, "epoch": 2.5814966345519332, "percentage": 51.63, "elapsed_time": "0:28:56", "remaining_time": "0:27:06", "throughput": 5545.58, "total_tokens": 9627312} +{"current_steps": 19565, "total_steps": 37885, "loss": 0.0, "lr": 1.117024976484971e-06, "epoch": 2.582156526329682, "percentage": 51.64, "elapsed_time": "0:28:56", "remaining_time": "0:27:05", "throughput": 5546.07, "total_tokens": 9630000} +{"current_steps": 19570, "total_steps": 37885, "loss": 0.1392, "lr": 1.1165674314271142e-06, "epoch": 2.5828164181074302, "percentage": 51.66, "elapsed_time": "0:28:56", "remaining_time": "0:27:05", "throughput": 5546.42, "total_tokens": 9632432} +{"current_steps": 19575, "total_steps": 37885, "loss": 0.1115, "lr": 1.1161098616286641e-06, "epoch": 2.583476309885179, "percentage": 51.67, "elapsed_time": "0:28:57", "remaining_time": "0:27:04", "throughput": 5546.85, "total_tokens": 9634992} +{"current_steps": 19580, "total_steps": 37885, "loss": 0.0023, "lr": 1.1156522671867366e-06, "epoch": 2.5841362016629272, "percentage": 51.68, "elapsed_time": "0:28:57", "remaining_time": "0:27:04", "throughput": 5547.14, "total_tokens": 9637296} +{"current_steps": 19585, "total_steps": 37885, "loss": 0.0005, "lr": 1.1151946481984528e-06, "epoch": 2.5847960934406755, "percentage": 51.7, "elapsed_time": "0:28:57", "remaining_time": "0:27:03", "throughput": 5547.47, "total_tokens": 9639664} +{"current_steps": 19590, "total_steps": 37885, "loss": 0.0971, "lr": 1.1147370047609391e-06, "epoch": 2.5854559852184242, "percentage": 51.71, "elapsed_time": "0:28:58", "remaining_time": "0:27:03", "throughput": 5547.88, "total_tokens": 9642224} +{"current_steps": 19595, "total_steps": 37885, "loss": 0.1041, "lr": 1.1142793369713273e-06, "epoch": 2.5861158769961725, "percentage": 51.72, "elapsed_time": "0:28:58", "remaining_time": "0:27:02", "throughput": 5548.2, "total_tokens": 9644592} +{"current_steps": 19600, "total_steps": 37885, "loss": 0.0555, "lr": 1.1138216449267536e-06, "epoch": 2.5867757687739212, "percentage": 51.74, "elapsed_time": "0:28:58", "remaining_time": "0:27:02", "throughput": 5548.62, "total_tokens": 9647152} +{"current_steps": 19605, "total_steps": 37885, "loss": 0.0006, "lr": 1.11336392872436e-06, "epoch": 2.5874356605516695, "percentage": 51.75, "elapsed_time": "0:28:58", "remaining_time": "0:27:01", "throughput": 5548.98, "total_tokens": 9649584} +{"current_steps": 19610, "total_steps": 37885, "loss": 0.0612, "lr": 1.112906188461293e-06, "epoch": 2.588095552329418, "percentage": 51.76, "elapsed_time": "0:28:59", "remaining_time": "0:27:00", "throughput": 5549.35, "total_tokens": 9652080} +{"current_steps": 19615, "total_steps": 37885, "loss": 0.0002, "lr": 1.1124484242347055e-06, "epoch": 2.5887554441071665, "percentage": 51.78, "elapsed_time": "0:28:59", "remaining_time": "0:27:00", "throughput": 5549.67, "total_tokens": 9654448} +{"current_steps": 19620, "total_steps": 37885, "loss": 0.0007, "lr": 1.1119906361417544e-06, "epoch": 2.589415335884915, "percentage": 51.79, "elapsed_time": "0:28:59", "remaining_time": "0:26:59", "throughput": 5550.09, "total_tokens": 9657008} +{"current_steps": 19625, "total_steps": 37885, "loss": 0.0618, "lr": 1.1115328242796017e-06, "epoch": 2.5900752276626635, "percentage": 51.8, "elapsed_time": "0:29:00", "remaining_time": "0:26:59", "throughput": 5550.41, "total_tokens": 9659376} +{"current_steps": 19630, "total_steps": 37885, "loss": 0.0002, "lr": 1.1110749887454146e-06, "epoch": 2.590735119440412, "percentage": 51.81, "elapsed_time": "0:29:00", "remaining_time": "0:26:58", "throughput": 5550.9, "total_tokens": 9662064} +{"current_steps": 19635, "total_steps": 37885, "loss": 0.0002, "lr": 1.110617129636365e-06, "epoch": 2.59139501121816, "percentage": 51.83, "elapsed_time": "0:29:00", "remaining_time": "0:26:58", "throughput": 5551.03, "total_tokens": 9664112} +{"current_steps": 19640, "total_steps": 37885, "loss": 0.0611, "lr": 1.1101592470496315e-06, "epoch": 2.592054902995909, "percentage": 51.84, "elapsed_time": "0:29:01", "remaining_time": "0:26:57", "throughput": 5551.23, "total_tokens": 9666288} +{"current_steps": 19645, "total_steps": 37885, "loss": 0.0007, "lr": 1.1097013410823952e-06, "epoch": 2.592714794773657, "percentage": 51.85, "elapsed_time": "0:29:01", "remaining_time": "0:26:57", "throughput": 5551.48, "total_tokens": 9668528} +{"current_steps": 19650, "total_steps": 37885, "loss": 0.0945, "lr": 1.1092434118318435e-06, "epoch": 2.593374686551406, "percentage": 51.87, "elapsed_time": "0:29:01", "remaining_time": "0:26:56", "throughput": 5551.77, "total_tokens": 9670832} +{"current_steps": 19655, "total_steps": 37885, "loss": 0.0826, "lr": 1.1087854593951688e-06, "epoch": 2.594034578329154, "percentage": 51.88, "elapsed_time": "0:29:02", "remaining_time": "0:26:55", "throughput": 5551.98, "total_tokens": 9673008} +{"current_steps": 19660, "total_steps": 37885, "loss": 0.2127, "lr": 1.108327483869568e-06, "epoch": 2.5946944701069024, "percentage": 51.89, "elapsed_time": "0:29:02", "remaining_time": "0:26:55", "throughput": 5551.57, "total_tokens": 9675568} +{"current_steps": 19665, "total_steps": 37885, "loss": 0.0008, "lr": 1.1078694853522435e-06, "epoch": 2.595354361884651, "percentage": 51.91, "elapsed_time": "0:29:03", "remaining_time": "0:26:55", "throughput": 5552.01, "total_tokens": 9678192} +{"current_steps": 19670, "total_steps": 37885, "loss": 0.0001, "lr": 1.1074114639404015e-06, "epoch": 2.5960142536623994, "percentage": 51.92, "elapsed_time": "0:29:03", "remaining_time": "0:26:54", "throughput": 5552.36, "total_tokens": 9680624} +{"current_steps": 19675, "total_steps": 37885, "loss": 0.0003, "lr": 1.1069534197312544e-06, "epoch": 2.596674145440148, "percentage": 51.93, "elapsed_time": "0:29:03", "remaining_time": "0:26:53", "throughput": 5552.71, "total_tokens": 9683056} +{"current_steps": 19680, "total_steps": 37885, "loss": 0.091, "lr": 1.1064953528220181e-06, "epoch": 2.5973340372178964, "percentage": 51.95, "elapsed_time": "0:29:04", "remaining_time": "0:26:53", "throughput": 5553.12, "total_tokens": 9685616} +{"current_steps": 19685, "total_steps": 37885, "loss": 0.0002, "lr": 1.1060372633099146e-06, "epoch": 2.5979939289956446, "percentage": 51.96, "elapsed_time": "0:29:04", "remaining_time": "0:26:52", "throughput": 5553.63, "total_tokens": 9688368} +{"current_steps": 19690, "total_steps": 37885, "loss": 0.0001, "lr": 1.10557915129217e-06, "epoch": 2.598653820773393, "percentage": 51.97, "elapsed_time": "0:29:04", "remaining_time": "0:26:52", "throughput": 5553.91, "total_tokens": 9690672} +{"current_steps": 19695, "total_steps": 37885, "loss": 0.0007, "lr": 1.1051210168660146e-06, "epoch": 2.5993137125511416, "percentage": 51.99, "elapsed_time": "0:29:05", "remaining_time": "0:26:51", "throughput": 5554.08, "total_tokens": 9692784} +{"current_steps": 19700, "total_steps": 37885, "loss": 0.0424, "lr": 1.1046628601286852e-06, "epoch": 2.59997360432889, "percentage": 52.0, "elapsed_time": "0:29:05", "remaining_time": "0:26:51", "throughput": 5554.43, "total_tokens": 9695216} +{"current_steps": 19705, "total_steps": 37885, "loss": 0.0569, "lr": 1.1042046811774213e-06, "epoch": 2.6006334961066386, "percentage": 52.01, "elapsed_time": "0:29:05", "remaining_time": "0:26:50", "throughput": 5554.67, "total_tokens": 9697456} +{"current_steps": 19710, "total_steps": 37885, "loss": 0.0568, "lr": 1.1037464801094684e-06, "epoch": 2.601293387884387, "percentage": 52.03, "elapsed_time": "0:29:06", "remaining_time": "0:26:50", "throughput": 5554.95, "total_tokens": 9699760} +{"current_steps": 19715, "total_steps": 37885, "loss": 0.0536, "lr": 1.1032882570220764e-06, "epoch": 2.601953279662135, "percentage": 52.04, "elapsed_time": "0:29:06", "remaining_time": "0:26:49", "throughput": 5555.25, "total_tokens": 9702128} +{"current_steps": 19720, "total_steps": 37885, "loss": 0.0003, "lr": 1.1028300120124997e-06, "epoch": 2.602613171439884, "percentage": 52.05, "elapsed_time": "0:29:06", "remaining_time": "0:26:49", "throughput": 5555.83, "total_tokens": 9705008} +{"current_steps": 19725, "total_steps": 37885, "loss": 0.0, "lr": 1.1023717451779977e-06, "epoch": 2.603273063217632, "percentage": 52.07, "elapsed_time": "0:29:07", "remaining_time": "0:26:48", "throughput": 5556.11, "total_tokens": 9707312} +{"current_steps": 19730, "total_steps": 37885, "loss": 0.0002, "lr": 1.1019134566158341e-06, "epoch": 2.603932954995381, "percentage": 52.08, "elapsed_time": "0:29:07", "remaining_time": "0:26:47", "throughput": 5556.56, "total_tokens": 9709936} +{"current_steps": 19735, "total_steps": 37885, "loss": 0.0001, "lr": 1.1014551464232773e-06, "epoch": 2.604592846773129, "percentage": 52.09, "elapsed_time": "0:29:07", "remaining_time": "0:26:47", "throughput": 5556.86, "total_tokens": 9712304} +{"current_steps": 19740, "total_steps": 37885, "loss": 0.0626, "lr": 1.1009968146976003e-06, "epoch": 2.6052527385508775, "percentage": 52.11, "elapsed_time": "0:29:08", "remaining_time": "0:26:46", "throughput": 5557.22, "total_tokens": 9714736} +{"current_steps": 19745, "total_steps": 37885, "loss": 0.1001, "lr": 1.100538461536081e-06, "epoch": 2.605912630328626, "percentage": 52.12, "elapsed_time": "0:29:08", "remaining_time": "0:26:46", "throughput": 5557.65, "total_tokens": 9717360} +{"current_steps": 19750, "total_steps": 37885, "loss": 0.0181, "lr": 1.1000800870360012e-06, "epoch": 2.6065725221063745, "percentage": 52.13, "elapsed_time": "0:29:08", "remaining_time": "0:26:45", "throughput": 5558.1, "total_tokens": 9719984} +{"current_steps": 19755, "total_steps": 37885, "loss": 0.0023, "lr": 1.0996216912946472e-06, "epoch": 2.607232413884123, "percentage": 52.14, "elapsed_time": "0:29:09", "remaining_time": "0:26:45", "throughput": 5558.41, "total_tokens": 9722352} +{"current_steps": 19760, "total_steps": 37885, "loss": 0.0001, "lr": 1.099163274409311e-06, "epoch": 2.6078923056618715, "percentage": 52.16, "elapsed_time": "0:29:09", "remaining_time": "0:26:44", "throughput": 5558.79, "total_tokens": 9724848} +{"current_steps": 19765, "total_steps": 37885, "loss": 0.1899, "lr": 1.098704836477288e-06, "epoch": 2.6085521974396197, "percentage": 52.17, "elapsed_time": "0:29:09", "remaining_time": "0:26:44", "throughput": 5559.24, "total_tokens": 9727472} +{"current_steps": 19770, "total_steps": 37885, "loss": 0.0, "lr": 1.098246377595878e-06, "epoch": 2.6092120892173685, "percentage": 52.18, "elapsed_time": "0:29:10", "remaining_time": "0:26:43", "throughput": 5559.55, "total_tokens": 9729840} +{"current_steps": 19775, "total_steps": 37885, "loss": 0.0006, "lr": 1.097787897862386e-06, "epoch": 2.6098719809951167, "percentage": 52.2, "elapsed_time": "0:29:10", "remaining_time": "0:26:43", "throughput": 5560.06, "total_tokens": 9732592} +{"current_steps": 19780, "total_steps": 37885, "loss": 0.1143, "lr": 1.097329397374121e-06, "epoch": 2.6105318727728655, "percentage": 52.21, "elapsed_time": "0:29:10", "remaining_time": "0:26:42", "throughput": 5560.37, "total_tokens": 9734960} +{"current_steps": 19785, "total_steps": 37885, "loss": 0.0001, "lr": 1.0968708762283955e-06, "epoch": 2.6111917645506137, "percentage": 52.22, "elapsed_time": "0:29:11", "remaining_time": "0:26:41", "throughput": 5560.69, "total_tokens": 9737328} +{"current_steps": 19790, "total_steps": 37885, "loss": 0.1288, "lr": 1.0964123345225285e-06, "epoch": 2.611851656328362, "percentage": 52.24, "elapsed_time": "0:29:11", "remaining_time": "0:26:41", "throughput": 5561.1, "total_tokens": 9739888} +{"current_steps": 19795, "total_steps": 37885, "loss": 0.1315, "lr": 1.0959537723538414e-06, "epoch": 2.6125115481061107, "percentage": 52.25, "elapsed_time": "0:29:11", "remaining_time": "0:26:40", "throughput": 5561.58, "total_tokens": 9742576} +{"current_steps": 19800, "total_steps": 37885, "loss": 0.014, "lr": 1.0954951898196614e-06, "epoch": 2.613171439883859, "percentage": 52.26, "elapsed_time": "0:29:12", "remaining_time": "0:26:40", "throughput": 5562.06, "total_tokens": 9745264} +{"current_steps": 19805, "total_steps": 37885, "loss": 0.1164, "lr": 1.0950365870173186e-06, "epoch": 2.6138313316616077, "percentage": 52.28, "elapsed_time": "0:29:12", "remaining_time": "0:26:39", "throughput": 5562.33, "total_tokens": 9747568} +{"current_steps": 19810, "total_steps": 37885, "loss": 0.0801, "lr": 1.0945779640441484e-06, "epoch": 2.614491223439356, "percentage": 52.29, "elapsed_time": "0:29:12", "remaining_time": "0:26:39", "throughput": 5562.78, "total_tokens": 9750192} +{"current_steps": 19815, "total_steps": 37885, "loss": 0.0024, "lr": 1.0941193209974902e-06, "epoch": 2.6151511152171043, "percentage": 52.3, "elapsed_time": "0:29:13", "remaining_time": "0:26:38", "throughput": 5563.2, "total_tokens": 9752752} +{"current_steps": 19820, "total_steps": 37885, "loss": 0.0005, "lr": 1.0936606579746877e-06, "epoch": 2.6158110069948526, "percentage": 52.32, "elapsed_time": "0:29:13", "remaining_time": "0:26:38", "throughput": 5563.71, "total_tokens": 9755504} +{"current_steps": 19825, "total_steps": 37885, "loss": 0.0253, "lr": 1.0932019750730888e-06, "epoch": 2.6164708987726013, "percentage": 52.33, "elapsed_time": "0:29:13", "remaining_time": "0:26:37", "throughput": 5564.06, "total_tokens": 9757936} +{"current_steps": 19830, "total_steps": 37885, "loss": 0.0, "lr": 1.0927432723900455e-06, "epoch": 2.6171307905503496, "percentage": 52.34, "elapsed_time": "0:29:14", "remaining_time": "0:26:37", "throughput": 5564.41, "total_tokens": 9760368} +{"current_steps": 19835, "total_steps": 37885, "loss": 0.0281, "lr": 1.0922845500229143e-06, "epoch": 2.6177906823280983, "percentage": 52.36, "elapsed_time": "0:29:14", "remaining_time": "0:26:36", "throughput": 5564.69, "total_tokens": 9762672} +{"current_steps": 19840, "total_steps": 37885, "loss": 0.0326, "lr": 1.0918258080690557e-06, "epoch": 2.6184505741058466, "percentage": 52.37, "elapsed_time": "0:29:14", "remaining_time": "0:26:35", "throughput": 5565.01, "total_tokens": 9765040} +{"current_steps": 19845, "total_steps": 37885, "loss": 0.0002, "lr": 1.0913670466258343e-06, "epoch": 2.619110465883595, "percentage": 52.38, "elapsed_time": "0:29:15", "remaining_time": "0:26:35", "throughput": 5565.38, "total_tokens": 9767536} +{"current_steps": 19850, "total_steps": 37885, "loss": 0.0271, "lr": 1.090908265790619e-06, "epoch": 2.6197703576613436, "percentage": 52.4, "elapsed_time": "0:29:15", "remaining_time": "0:26:34", "throughput": 5565.77, "total_tokens": 9770032} +{"current_steps": 19855, "total_steps": 37885, "loss": 0.0798, "lr": 1.0904494656607824e-06, "epoch": 2.620430249439092, "percentage": 52.41, "elapsed_time": "0:29:15", "remaining_time": "0:26:34", "throughput": 5566.2, "total_tokens": 9772656} +{"current_steps": 19860, "total_steps": 37885, "loss": 0.0459, "lr": 1.0899906463337016e-06, "epoch": 2.6210901412168406, "percentage": 52.42, "elapsed_time": "0:29:16", "remaining_time": "0:26:33", "throughput": 5566.71, "total_tokens": 9775408} +{"current_steps": 19865, "total_steps": 37885, "loss": 0.0001, "lr": 1.0895318079067576e-06, "epoch": 2.621750032994589, "percentage": 52.44, "elapsed_time": "0:29:16", "remaining_time": "0:26:33", "throughput": 5566.99, "total_tokens": 9777712} +{"current_steps": 19870, "total_steps": 37885, "loss": 0.1895, "lr": 1.0890729504773359e-06, "epoch": 2.622409924772337, "percentage": 52.45, "elapsed_time": "0:29:16", "remaining_time": "0:26:32", "throughput": 5567.33, "total_tokens": 9780144} +{"current_steps": 19875, "total_steps": 37885, "loss": 0.0005, "lr": 1.0886140741428257e-06, "epoch": 2.623069816550086, "percentage": 52.46, "elapsed_time": "0:29:17", "remaining_time": "0:26:32", "throughput": 5567.87, "total_tokens": 9782960} +{"current_steps": 19880, "total_steps": 37885, "loss": 0.0887, "lr": 1.08815517900062e-06, "epoch": 2.623729708327834, "percentage": 52.47, "elapsed_time": "0:29:17", "remaining_time": "0:26:31", "throughput": 5568.08, "total_tokens": 9785136} +{"current_steps": 19885, "total_steps": 37885, "loss": 0.0035, "lr": 1.0876962651481159e-06, "epoch": 2.624389600105583, "percentage": 52.49, "elapsed_time": "0:29:17", "remaining_time": "0:26:31", "throughput": 5568.5, "total_tokens": 9787696} +{"current_steps": 19890, "total_steps": 37885, "loss": 0.0009, "lr": 1.0872373326827143e-06, "epoch": 2.625049491883331, "percentage": 52.5, "elapsed_time": "0:29:18", "remaining_time": "0:26:30", "throughput": 5568.88, "total_tokens": 9790192} +{"current_steps": 19895, "total_steps": 37885, "loss": 0.0338, "lr": 1.0867783817018207e-06, "epoch": 2.6257093836610794, "percentage": 52.51, "elapsed_time": "0:29:18", "remaining_time": "0:26:29", "throughput": 5569.28, "total_tokens": 9792752} +{"current_steps": 19900, "total_steps": 37885, "loss": 0.1192, "lr": 1.086319412302844e-06, "epoch": 2.626369275438828, "percentage": 52.53, "elapsed_time": "0:29:18", "remaining_time": "0:26:29", "throughput": 5569.73, "total_tokens": 9795376} +{"current_steps": 19905, "total_steps": 37885, "loss": 0.0018, "lr": 1.085860424583197e-06, "epoch": 2.6270291672165764, "percentage": 52.54, "elapsed_time": "0:29:19", "remaining_time": "0:26:28", "throughput": 5570.1, "total_tokens": 9797872} +{"current_steps": 19910, "total_steps": 37885, "loss": 0.0488, "lr": 1.0854014186402968e-06, "epoch": 2.627689058994325, "percentage": 52.55, "elapsed_time": "0:29:19", "remaining_time": "0:26:28", "throughput": 5570.5, "total_tokens": 9800432} +{"current_steps": 19915, "total_steps": 37885, "loss": 0.0001, "lr": 1.0849423945715637e-06, "epoch": 2.6283489507720734, "percentage": 52.57, "elapsed_time": "0:29:19", "remaining_time": "0:26:27", "throughput": 5570.9, "total_tokens": 9802992} +{"current_steps": 19920, "total_steps": 37885, "loss": 0.0881, "lr": 1.0844833524744226e-06, "epoch": 2.6290088425498217, "percentage": 52.58, "elapsed_time": "0:29:20", "remaining_time": "0:26:27", "throughput": 5571.24, "total_tokens": 9805424} +{"current_steps": 19925, "total_steps": 37885, "loss": 0.0001, "lr": 1.0840242924463016e-06, "epoch": 2.6296687343275704, "percentage": 52.59, "elapsed_time": "0:29:20", "remaining_time": "0:26:26", "throughput": 5571.68, "total_tokens": 9808048} +{"current_steps": 19930, "total_steps": 37885, "loss": 0.0007, "lr": 1.0835652145846335e-06, "epoch": 2.6303286261053187, "percentage": 52.61, "elapsed_time": "0:29:20", "remaining_time": "0:26:26", "throughput": 5572.08, "total_tokens": 9810608} +{"current_steps": 19935, "total_steps": 37885, "loss": 0.1537, "lr": 1.0831061189868531e-06, "epoch": 2.6309885178830674, "percentage": 52.62, "elapsed_time": "0:29:21", "remaining_time": "0:26:25", "throughput": 5572.31, "total_tokens": 9812848} +{"current_steps": 19940, "total_steps": 37885, "loss": 0.1011, "lr": 1.0826470057504008e-06, "epoch": 2.6316484096608157, "percentage": 52.63, "elapsed_time": "0:29:21", "remaining_time": "0:26:25", "throughput": 5572.55, "total_tokens": 9815088} +{"current_steps": 19945, "total_steps": 37885, "loss": 0.0767, "lr": 1.0821878749727204e-06, "epoch": 2.632308301438564, "percentage": 52.65, "elapsed_time": "0:29:21", "remaining_time": "0:26:24", "throughput": 5572.89, "total_tokens": 9817520} +{"current_steps": 19950, "total_steps": 37885, "loss": 0.0003, "lr": 1.0817287267512583e-06, "epoch": 2.6329681932163123, "percentage": 52.66, "elapsed_time": "0:29:21", "remaining_time": "0:26:24", "throughput": 5573.29, "total_tokens": 9820080} +{"current_steps": 19955, "total_steps": 37885, "loss": 0.1129, "lr": 1.0812695611834664e-06, "epoch": 2.633628084994061, "percentage": 52.67, "elapsed_time": "0:29:22", "remaining_time": "0:26:23", "throughput": 5573.53, "total_tokens": 9822320} +{"current_steps": 19960, "total_steps": 37885, "loss": 0.0007, "lr": 1.0808103783667981e-06, "epoch": 2.6342879767718093, "percentage": 52.69, "elapsed_time": "0:29:22", "remaining_time": "0:26:22", "throughput": 5573.68, "total_tokens": 9824432} +{"current_steps": 19965, "total_steps": 37885, "loss": 0.0001, "lr": 1.0803511783987122e-06, "epoch": 2.634947868549558, "percentage": 52.7, "elapsed_time": "0:29:22", "remaining_time": "0:26:22", "throughput": 5574.08, "total_tokens": 9826992} +{"current_steps": 19970, "total_steps": 37885, "loss": 0.0016, "lr": 1.0798919613766707e-06, "epoch": 2.6356077603273063, "percentage": 52.71, "elapsed_time": "0:29:23", "remaining_time": "0:26:21", "throughput": 5574.41, "total_tokens": 9829424} +{"current_steps": 19975, "total_steps": 37885, "loss": 0.1057, "lr": 1.079432727398139e-06, "epoch": 2.6362676521050545, "percentage": 52.73, "elapsed_time": "0:29:23", "remaining_time": "0:26:21", "throughput": 5574.86, "total_tokens": 9832048} +{"current_steps": 19980, "total_steps": 37885, "loss": 0.0002, "lr": 1.078973476560586e-06, "epoch": 2.6369275438828033, "percentage": 52.74, "elapsed_time": "0:29:23", "remaining_time": "0:26:20", "throughput": 5575.13, "total_tokens": 9834352} +{"current_steps": 19985, "total_steps": 37885, "loss": 0.0002, "lr": 1.0785142089614843e-06, "epoch": 2.6375874356605515, "percentage": 52.75, "elapsed_time": "0:29:24", "remaining_time": "0:26:20", "throughput": 5575.47, "total_tokens": 9836784} +{"current_steps": 19990, "total_steps": 37885, "loss": 0.0017, "lr": 1.0780549246983105e-06, "epoch": 2.6382473274383003, "percentage": 52.76, "elapsed_time": "0:29:24", "remaining_time": "0:26:19", "throughput": 5575.77, "total_tokens": 9839152} +{"current_steps": 19995, "total_steps": 37885, "loss": 0.0005, "lr": 1.077595623868544e-06, "epoch": 2.6389072192160485, "percentage": 52.78, "elapsed_time": "0:29:24", "remaining_time": "0:26:19", "throughput": 5576.18, "total_tokens": 9841712} +{"current_steps": 20000, "total_steps": 37885, "loss": 0.1022, "lr": 1.0771363065696684e-06, "epoch": 2.639567110993797, "percentage": 52.79, "elapsed_time": "0:29:25", "remaining_time": "0:26:18", "throughput": 5576.52, "total_tokens": 9844144} +{"current_steps": 20005, "total_steps": 37885, "loss": 0.0979, "lr": 1.0766769728991705e-06, "epoch": 2.6402270027715455, "percentage": 52.8, "elapsed_time": "0:29:25", "remaining_time": "0:26:18", "throughput": 5576.84, "total_tokens": 9846512} +{"current_steps": 20010, "total_steps": 37885, "loss": 0.0752, "lr": 1.0762176229545398e-06, "epoch": 2.640886894549294, "percentage": 52.82, "elapsed_time": "0:29:25", "remaining_time": "0:26:17", "throughput": 5577.28, "total_tokens": 9849136} +{"current_steps": 20015, "total_steps": 37885, "loss": 0.0312, "lr": 1.0757582568332711e-06, "epoch": 2.6415467863270425, "percentage": 52.83, "elapsed_time": "0:29:26", "remaining_time": "0:26:16", "throughput": 5577.59, "total_tokens": 9851504} +{"current_steps": 20020, "total_steps": 37885, "loss": 0.0831, "lr": 1.0752988746328607e-06, "epoch": 2.642206678104791, "percentage": 52.84, "elapsed_time": "0:29:26", "remaining_time": "0:26:16", "throughput": 5577.89, "total_tokens": 9853872} +{"current_steps": 20025, "total_steps": 37885, "loss": 0.0416, "lr": 1.0748394764508095e-06, "epoch": 2.642866569882539, "percentage": 52.86, "elapsed_time": "0:29:26", "remaining_time": "0:26:15", "throughput": 5578.2, "total_tokens": 9856240} +{"current_steps": 20030, "total_steps": 37885, "loss": 0.0012, "lr": 1.0743800623846213e-06, "epoch": 2.643526461660288, "percentage": 52.87, "elapsed_time": "0:29:27", "remaining_time": "0:26:15", "throughput": 5578.45, "total_tokens": 9858480} +{"current_steps": 20035, "total_steps": 37885, "loss": 0.0004, "lr": 1.0739206325318038e-06, "epoch": 2.644186353438036, "percentage": 52.88, "elapsed_time": "0:29:27", "remaining_time": "0:26:14", "throughput": 5578.73, "total_tokens": 9860784} +{"current_steps": 20040, "total_steps": 37885, "loss": 0.0001, "lr": 1.0734611869898668e-06, "epoch": 2.644846245215785, "percentage": 52.9, "elapsed_time": "0:29:27", "remaining_time": "0:26:14", "throughput": 5579.17, "total_tokens": 9863408} +{"current_steps": 20045, "total_steps": 37885, "loss": 0.0003, "lr": 1.0730017258563253e-06, "epoch": 2.645506136993533, "percentage": 52.91, "elapsed_time": "0:29:28", "remaining_time": "0:26:13", "throughput": 5579.52, "total_tokens": 9865840} +{"current_steps": 20050, "total_steps": 37885, "loss": 0.0004, "lr": 1.0725422492286957e-06, "epoch": 2.6461660287712814, "percentage": 52.92, "elapsed_time": "0:29:28", "remaining_time": "0:26:13", "throughput": 5579.84, "total_tokens": 9868208} +{"current_steps": 20055, "total_steps": 37885, "loss": 0.0818, "lr": 1.0720827572044995e-06, "epoch": 2.64682592054903, "percentage": 52.94, "elapsed_time": "0:29:28", "remaining_time": "0:26:12", "throughput": 5580.01, "total_tokens": 9870320} +{"current_steps": 20060, "total_steps": 37885, "loss": 0.0457, "lr": 1.0716232498812598e-06, "epoch": 2.6474858123267784, "percentage": 52.95, "elapsed_time": "0:29:29", "remaining_time": "0:26:12", "throughput": 5580.35, "total_tokens": 9872752} +{"current_steps": 20065, "total_steps": 37885, "loss": 0.0002, "lr": 1.0711637273565037e-06, "epoch": 2.648145704104527, "percentage": 52.96, "elapsed_time": "0:29:29", "remaining_time": "0:26:11", "throughput": 5580.79, "total_tokens": 9875376} +{"current_steps": 20070, "total_steps": 37885, "loss": 0.002, "lr": 1.0707041897277623e-06, "epoch": 2.6488055958822754, "percentage": 52.98, "elapsed_time": "0:29:29", "remaining_time": "0:26:11", "throughput": 5581.06, "total_tokens": 9877680} +{"current_steps": 20075, "total_steps": 37885, "loss": 0.1485, "lr": 1.0702446370925682e-06, "epoch": 2.6494654876600237, "percentage": 52.99, "elapsed_time": "0:29:30", "remaining_time": "0:26:10", "throughput": 5581.43, "total_tokens": 9880176} +{"current_steps": 20080, "total_steps": 37885, "loss": 0.0, "lr": 1.069785069548459e-06, "epoch": 2.650125379437772, "percentage": 53.0, "elapsed_time": "0:29:30", "remaining_time": "0:26:09", "throughput": 5581.81, "total_tokens": 9882672} +{"current_steps": 20085, "total_steps": 37885, "loss": 0.0884, "lr": 1.0693254871929737e-06, "epoch": 2.6507852712155207, "percentage": 53.02, "elapsed_time": "0:29:30", "remaining_time": "0:26:09", "throughput": 5582.19, "total_tokens": 9885168} +{"current_steps": 20090, "total_steps": 37885, "loss": 0.0369, "lr": 1.068865890123656e-06, "epoch": 2.6514451629932694, "percentage": 53.03, "elapsed_time": "0:29:31", "remaining_time": "0:26:08", "throughput": 5582.59, "total_tokens": 9887728} +{"current_steps": 20095, "total_steps": 37885, "loss": 0.0975, "lr": 1.068406278438052e-06, "epoch": 2.6521050547710177, "percentage": 53.04, "elapsed_time": "0:29:31", "remaining_time": "0:26:08", "throughput": 5582.8, "total_tokens": 9889904} +{"current_steps": 20100, "total_steps": 37885, "loss": 0.0004, "lr": 1.0679466522337102e-06, "epoch": 2.652764946548766, "percentage": 53.06, "elapsed_time": "0:29:31", "remaining_time": "0:26:07", "throughput": 5583.1, "total_tokens": 9892272} +{"current_steps": 20105, "total_steps": 37885, "loss": 0.0012, "lr": 1.0674870116081838e-06, "epoch": 2.653424838326514, "percentage": 53.07, "elapsed_time": "0:29:32", "remaining_time": "0:26:07", "throughput": 5583.51, "total_tokens": 9894832} +{"current_steps": 20110, "total_steps": 37885, "loss": 0.0145, "lr": 1.067027356659028e-06, "epoch": 2.654084730104263, "percentage": 53.08, "elapsed_time": "0:29:32", "remaining_time": "0:26:06", "throughput": 5583.97, "total_tokens": 9897520} +{"current_steps": 20115, "total_steps": 37885, "loss": 0.0002, "lr": 1.066567687483801e-06, "epoch": 2.654744621882011, "percentage": 53.09, "elapsed_time": "0:29:32", "remaining_time": "0:26:06", "throughput": 5584.17, "total_tokens": 9899696} +{"current_steps": 20120, "total_steps": 37885, "loss": 0.0003, "lr": 1.0661080041800642e-06, "epoch": 2.65540451365976, "percentage": 53.11, "elapsed_time": "0:29:33", "remaining_time": "0:26:05", "throughput": 5584.68, "total_tokens": 9902448} +{"current_steps": 20125, "total_steps": 37885, "loss": 0.0975, "lr": 1.0656483068453828e-06, "epoch": 2.656064405437508, "percentage": 53.12, "elapsed_time": "0:29:33", "remaining_time": "0:26:05", "throughput": 5585.01, "total_tokens": 9904880} +{"current_steps": 20130, "total_steps": 37885, "loss": 0.0077, "lr": 1.065188595577323e-06, "epoch": 2.6567242972152565, "percentage": 53.13, "elapsed_time": "0:29:33", "remaining_time": "0:26:04", "throughput": 5585.39, "total_tokens": 9907376} +{"current_steps": 20135, "total_steps": 37885, "loss": 0.0001, "lr": 1.0647288704734563e-06, "epoch": 2.657384188993005, "percentage": 53.15, "elapsed_time": "0:29:34", "remaining_time": "0:26:03", "throughput": 5585.83, "total_tokens": 9910000} +{"current_steps": 20140, "total_steps": 37885, "loss": 0.0893, "lr": 1.0642691316313556e-06, "epoch": 2.6580440807707535, "percentage": 53.16, "elapsed_time": "0:29:34", "remaining_time": "0:26:03", "throughput": 5586.14, "total_tokens": 9912368} +{"current_steps": 20145, "total_steps": 37885, "loss": 0.057, "lr": 1.0638093791485964e-06, "epoch": 2.658703972548502, "percentage": 53.17, "elapsed_time": "0:29:34", "remaining_time": "0:26:02", "throughput": 5586.41, "total_tokens": 9914672} +{"current_steps": 20150, "total_steps": 37885, "loss": 0.0003, "lr": 1.0633496131227593e-06, "epoch": 2.6593638643262505, "percentage": 53.19, "elapsed_time": "0:29:35", "remaining_time": "0:26:02", "throughput": 5586.75, "total_tokens": 9917104} +{"current_steps": 20155, "total_steps": 37885, "loss": 0.0001, "lr": 1.0628898336514252e-06, "epoch": 2.6600237561039988, "percentage": 53.2, "elapsed_time": "0:29:35", "remaining_time": "0:26:01", "throughput": 5587.08, "total_tokens": 9919536} +{"current_steps": 20160, "total_steps": 37885, "loss": 0.0844, "lr": 1.0624300408321795e-06, "epoch": 2.6606836478817475, "percentage": 53.21, "elapsed_time": "0:29:35", "remaining_time": "0:26:01", "throughput": 5587.28, "total_tokens": 9921712} +{"current_steps": 20165, "total_steps": 37885, "loss": 0.0596, "lr": 1.0619702347626098e-06, "epoch": 2.6613435396594958, "percentage": 53.23, "elapsed_time": "0:29:36", "remaining_time": "0:26:00", "throughput": 5587.61, "total_tokens": 9924144} +{"current_steps": 20170, "total_steps": 37885, "loss": 0.0002, "lr": 1.0615104155403063e-06, "epoch": 2.6620034314372445, "percentage": 53.24, "elapsed_time": "0:29:36", "remaining_time": "0:26:00", "throughput": 5587.98, "total_tokens": 9926640} +{"current_steps": 20175, "total_steps": 37885, "loss": 0.1212, "lr": 1.0610505832628626e-06, "epoch": 2.6626633232149928, "percentage": 53.25, "elapsed_time": "0:29:36", "remaining_time": "0:25:59", "throughput": 5588.32, "total_tokens": 9929072} +{"current_steps": 20180, "total_steps": 37885, "loss": 0.0673, "lr": 1.0605907380278745e-06, "epoch": 2.663323214992741, "percentage": 53.27, "elapsed_time": "0:29:37", "remaining_time": "0:25:59", "throughput": 5588.6, "total_tokens": 9931376} +{"current_steps": 20185, "total_steps": 37885, "loss": 0.0001, "lr": 1.0601308799329413e-06, "epoch": 2.6639831067704898, "percentage": 53.28, "elapsed_time": "0:29:37", "remaining_time": "0:25:58", "throughput": 5588.8, "total_tokens": 9933552} +{"current_steps": 20190, "total_steps": 37885, "loss": 0.1238, "lr": 1.0596710090756641e-06, "epoch": 2.664642998548238, "percentage": 53.29, "elapsed_time": "0:29:37", "remaining_time": "0:25:58", "throughput": 5589.01, "total_tokens": 9935728} +{"current_steps": 20195, "total_steps": 37885, "loss": 0.0167, "lr": 1.0592111255536478e-06, "epoch": 2.6653028903259868, "percentage": 53.31, "elapsed_time": "0:29:38", "remaining_time": "0:25:57", "throughput": 5589.25, "total_tokens": 9937968} +{"current_steps": 20200, "total_steps": 37885, "loss": 0.1255, "lr": 1.0587512294644982e-06, "epoch": 2.665962782103735, "percentage": 53.32, "elapsed_time": "0:29:38", "remaining_time": "0:25:56", "throughput": 5589.53, "total_tokens": 9940272} +{"current_steps": 20205, "total_steps": 37885, "loss": 0.0168, "lr": 1.0582913209058257e-06, "epoch": 2.6666226738814833, "percentage": 53.33, "elapsed_time": "0:29:38", "remaining_time": "0:25:56", "throughput": 5589.9, "total_tokens": 9942768} +{"current_steps": 20210, "total_steps": 37885, "loss": 0.2539, "lr": 1.0578313999752427e-06, "epoch": 2.667282565659232, "percentage": 53.35, "elapsed_time": "0:29:39", "remaining_time": "0:25:55", "throughput": 5590.38, "total_tokens": 9945456} +{"current_steps": 20215, "total_steps": 37885, "loss": 0.001, "lr": 1.0573714667703638e-06, "epoch": 2.6679424574369803, "percentage": 53.36, "elapsed_time": "0:29:39", "remaining_time": "0:25:55", "throughput": 5590.84, "total_tokens": 9948144} +{"current_steps": 20220, "total_steps": 37885, "loss": 0.0813, "lr": 1.0569115213888067e-06, "epoch": 2.668602349214729, "percentage": 53.37, "elapsed_time": "0:29:39", "remaining_time": "0:25:54", "throughput": 5591.31, "total_tokens": 9950832} +{"current_steps": 20225, "total_steps": 37885, "loss": 0.1883, "lr": 1.0564515639281911e-06, "epoch": 2.6692622409924773, "percentage": 53.39, "elapsed_time": "0:29:40", "remaining_time": "0:25:54", "throughput": 5591.71, "total_tokens": 9953392} +{"current_steps": 20230, "total_steps": 37885, "loss": 0.0004, "lr": 1.0559915944861397e-06, "epoch": 2.6699221327702256, "percentage": 53.4, "elapsed_time": "0:29:40", "remaining_time": "0:25:53", "throughput": 5592.15, "total_tokens": 9956016} +{"current_steps": 20235, "total_steps": 37885, "loss": 0.0942, "lr": 1.0555316131602778e-06, "epoch": 2.670582024547974, "percentage": 53.41, "elapsed_time": "0:29:40", "remaining_time": "0:25:53", "throughput": 5592.52, "total_tokens": 9958512} +{"current_steps": 20240, "total_steps": 37885, "loss": 0.0016, "lr": 1.0550716200482335e-06, "epoch": 2.6712419163257226, "percentage": 53.42, "elapsed_time": "0:29:41", "remaining_time": "0:25:52", "throughput": 5592.89, "total_tokens": 9961008} +{"current_steps": 20245, "total_steps": 37885, "loss": 0.0428, "lr": 1.0546116152476366e-06, "epoch": 2.671901808103471, "percentage": 53.44, "elapsed_time": "0:29:41", "remaining_time": "0:25:52", "throughput": 5593.28, "total_tokens": 9963568} +{"current_steps": 20250, "total_steps": 37885, "loss": 0.1266, "lr": 1.0541515988561195e-06, "epoch": 2.6725616998812196, "percentage": 53.45, "elapsed_time": "0:29:41", "remaining_time": "0:25:51", "throughput": 5593.52, "total_tokens": 9965808} +{"current_steps": 20255, "total_steps": 37885, "loss": 0.0373, "lr": 1.053691570971318e-06, "epoch": 2.673221591658968, "percentage": 53.46, "elapsed_time": "0:29:42", "remaining_time": "0:25:51", "throughput": 5593.88, "total_tokens": 9968304} +{"current_steps": 20260, "total_steps": 37885, "loss": 0.0505, "lr": 1.0532315316908691e-06, "epoch": 2.673881483436716, "percentage": 53.48, "elapsed_time": "0:29:42", "remaining_time": "0:25:50", "throughput": 5594.16, "total_tokens": 9970608} +{"current_steps": 20265, "total_steps": 37885, "loss": 0.0933, "lr": 1.0527714811124132e-06, "epoch": 2.674541375214465, "percentage": 53.49, "elapsed_time": "0:29:42", "remaining_time": "0:25:49", "throughput": 5594.46, "total_tokens": 9972976} +{"current_steps": 20270, "total_steps": 37885, "loss": 0.0012, "lr": 1.0523114193335926e-06, "epoch": 2.675201266992213, "percentage": 53.5, "elapsed_time": "0:29:42", "remaining_time": "0:25:49", "throughput": 5594.83, "total_tokens": 9975472} +{"current_steps": 20275, "total_steps": 37885, "loss": 0.1191, "lr": 1.051851346452052e-06, "epoch": 2.675861158769962, "percentage": 53.52, "elapsed_time": "0:29:43", "remaining_time": "0:25:48", "throughput": 5595.13, "total_tokens": 9977840} +{"current_steps": 20280, "total_steps": 37885, "loss": 0.0011, "lr": 1.0513912625654386e-06, "epoch": 2.67652105054771, "percentage": 53.53, "elapsed_time": "0:29:43", "remaining_time": "0:25:48", "throughput": 5595.36, "total_tokens": 9980080} +{"current_steps": 20285, "total_steps": 37885, "loss": 0.0008, "lr": 1.0509311677714016e-06, "epoch": 2.6771809423254584, "percentage": 53.54, "elapsed_time": "0:29:43", "remaining_time": "0:25:47", "throughput": 5595.63, "total_tokens": 9982384} +{"current_steps": 20290, "total_steps": 37885, "loss": 0.0005, "lr": 1.050471062167594e-06, "epoch": 2.677840834103207, "percentage": 53.56, "elapsed_time": "0:29:44", "remaining_time": "0:25:47", "throughput": 5596.14, "total_tokens": 9985136} +{"current_steps": 20295, "total_steps": 37885, "loss": 0.1043, "lr": 1.050010945851668e-06, "epoch": 2.6785007258809554, "percentage": 53.57, "elapsed_time": "0:29:44", "remaining_time": "0:25:46", "throughput": 5596.57, "total_tokens": 9987760} +{"current_steps": 20300, "total_steps": 37885, "loss": 0.0001, "lr": 1.049550818921281e-06, "epoch": 2.679160617658704, "percentage": 53.58, "elapsed_time": "0:29:44", "remaining_time": "0:25:46", "throughput": 5596.97, "total_tokens": 9990320} +{"current_steps": 20305, "total_steps": 37885, "loss": 0.0004, "lr": 1.0490906814740916e-06, "epoch": 2.6798205094364524, "percentage": 53.6, "elapsed_time": "0:29:45", "remaining_time": "0:25:45", "throughput": 5597.33, "total_tokens": 9992816} +{"current_steps": 20310, "total_steps": 37885, "loss": 0.0006, "lr": 1.0486305336077609e-06, "epoch": 2.6804804012142007, "percentage": 53.61, "elapsed_time": "0:29:45", "remaining_time": "0:25:45", "throughput": 5597.59, "total_tokens": 9995120} +{"current_steps": 20315, "total_steps": 37885, "loss": 0.1659, "lr": 1.0481703754199513e-06, "epoch": 2.6811402929919494, "percentage": 53.62, "elapsed_time": "0:29:45", "remaining_time": "0:25:44", "throughput": 5597.89, "total_tokens": 9997488} +{"current_steps": 20320, "total_steps": 37885, "loss": 0.0007, "lr": 1.047710207008328e-06, "epoch": 2.6818001847696977, "percentage": 53.64, "elapsed_time": "0:29:46", "remaining_time": "0:25:44", "throughput": 5598.23, "total_tokens": 9999920} +{"current_steps": 20325, "total_steps": 37885, "loss": 0.1558, "lr": 1.0472500284705595e-06, "epoch": 2.6824600765474464, "percentage": 53.65, "elapsed_time": "0:29:46", "remaining_time": "0:25:43", "throughput": 5598.57, "total_tokens": 10002352} +{"current_steps": 20330, "total_steps": 37885, "loss": 0.0008, "lr": 1.046789839904314e-06, "epoch": 2.6831199683251947, "percentage": 53.66, "elapsed_time": "0:29:46", "remaining_time": "0:25:43", "throughput": 5598.8, "total_tokens": 10004592} +{"current_steps": 20335, "total_steps": 37885, "loss": 0.038, "lr": 1.0463296414072641e-06, "epoch": 2.683779860102943, "percentage": 53.68, "elapsed_time": "0:29:47", "remaining_time": "0:25:42", "throughput": 5599.14, "total_tokens": 10007024} +{"current_steps": 20340, "total_steps": 37885, "loss": 0.0877, "lr": 1.0458694330770832e-06, "epoch": 2.6844397518806917, "percentage": 53.69, "elapsed_time": "0:29:47", "remaining_time": "0:25:41", "throughput": 5599.61, "total_tokens": 10009712} +{"current_steps": 20345, "total_steps": 37885, "loss": 0.0001, "lr": 1.0454092150114473e-06, "epoch": 2.68509964365844, "percentage": 53.7, "elapsed_time": "0:29:47", "remaining_time": "0:25:41", "throughput": 5600.08, "total_tokens": 10012400} +{"current_steps": 20350, "total_steps": 37885, "loss": 0.0013, "lr": 1.0449489873080344e-06, "epoch": 2.6857595354361887, "percentage": 53.72, "elapsed_time": "0:29:48", "remaining_time": "0:25:40", "throughput": 5600.32, "total_tokens": 10014640} +{"current_steps": 20355, "total_steps": 37885, "loss": 0.0002, "lr": 1.0444887500645244e-06, "epoch": 2.686419427213937, "percentage": 53.73, "elapsed_time": "0:29:48", "remaining_time": "0:25:40", "throughput": 5600.72, "total_tokens": 10017200} +{"current_steps": 20360, "total_steps": 37885, "loss": 0.0006, "lr": 1.0440285033785994e-06, "epoch": 2.6870793189916853, "percentage": 53.74, "elapsed_time": "0:29:48", "remaining_time": "0:25:39", "throughput": 5601.18, "total_tokens": 10019888} +{"current_steps": 20365, "total_steps": 37885, "loss": 0.0873, "lr": 1.0435682473479433e-06, "epoch": 2.6877392107694336, "percentage": 53.75, "elapsed_time": "0:29:49", "remaining_time": "0:25:39", "throughput": 5601.38, "total_tokens": 10022064} +{"current_steps": 20370, "total_steps": 37885, "loss": 0.0001, "lr": 1.0431079820702425e-06, "epoch": 2.6883991025471823, "percentage": 53.77, "elapsed_time": "0:29:49", "remaining_time": "0:25:38", "throughput": 5601.72, "total_tokens": 10024496} +{"current_steps": 20375, "total_steps": 37885, "loss": 0.0003, "lr": 1.042647707643184e-06, "epoch": 2.6890589943249306, "percentage": 53.78, "elapsed_time": "0:29:49", "remaining_time": "0:25:38", "throughput": 5602.13, "total_tokens": 10027056} +{"current_steps": 20380, "total_steps": 37885, "loss": 0.0535, "lr": 1.0421874241644591e-06, "epoch": 2.6897188861026793, "percentage": 53.79, "elapsed_time": "0:29:50", "remaining_time": "0:25:37", "throughput": 5602.53, "total_tokens": 10029616} +{"current_steps": 20385, "total_steps": 37885, "loss": 0.0001, "lr": 1.0417271317317585e-06, "epoch": 2.6903787778804276, "percentage": 53.81, "elapsed_time": "0:29:50", "remaining_time": "0:25:37", "throughput": 5603.0, "total_tokens": 10032304} +{"current_steps": 20390, "total_steps": 37885, "loss": 0.0001, "lr": 1.0412668304427766e-06, "epoch": 2.691038669658176, "percentage": 53.82, "elapsed_time": "0:29:50", "remaining_time": "0:25:36", "throughput": 5603.36, "total_tokens": 10034800} +{"current_steps": 20395, "total_steps": 37885, "loss": 0.0005, "lr": 1.0408065203952086e-06, "epoch": 2.6916985614359246, "percentage": 53.83, "elapsed_time": "0:29:51", "remaining_time": "0:25:36", "throughput": 5603.8, "total_tokens": 10037424} +{"current_steps": 20400, "total_steps": 37885, "loss": 0.0002, "lr": 1.040346201686752e-06, "epoch": 2.692358453213673, "percentage": 53.85, "elapsed_time": "0:29:51", "remaining_time": "0:25:35", "throughput": 5604.2, "total_tokens": 10039984} +{"current_steps": 20405, "total_steps": 37885, "loss": 0.0596, "lr": 1.0398858744151067e-06, "epoch": 2.6930183449914216, "percentage": 53.86, "elapsed_time": "0:29:51", "remaining_time": "0:25:34", "throughput": 5604.65, "total_tokens": 10042672} +{"current_steps": 20410, "total_steps": 37885, "loss": 0.0004, "lr": 1.0394255386779728e-06, "epoch": 2.69367823676917, "percentage": 53.87, "elapsed_time": "0:29:52", "remaining_time": "0:25:34", "throughput": 5604.88, "total_tokens": 10044912} +{"current_steps": 20415, "total_steps": 37885, "loss": 0.0016, "lr": 1.0389651945730545e-06, "epoch": 2.694338128546918, "percentage": 53.89, "elapsed_time": "0:29:52", "remaining_time": "0:25:33", "throughput": 5605.15, "total_tokens": 10047216} +{"current_steps": 20420, "total_steps": 37885, "loss": 0.1896, "lr": 1.0385048421980554e-06, "epoch": 2.694998020324667, "percentage": 53.9, "elapsed_time": "0:29:52", "remaining_time": "0:25:33", "throughput": 5605.48, "total_tokens": 10049648} +{"current_steps": 20425, "total_steps": 37885, "loss": 0.0583, "lr": 1.0380444816506822e-06, "epoch": 2.695657912102415, "percentage": 53.91, "elapsed_time": "0:29:53", "remaining_time": "0:25:32", "throughput": 5605.88, "total_tokens": 10052208} +{"current_steps": 20430, "total_steps": 37885, "loss": 0.0242, "lr": 1.0375841130286436e-06, "epoch": 2.696317803880164, "percentage": 53.93, "elapsed_time": "0:29:53", "remaining_time": "0:25:32", "throughput": 5606.21, "total_tokens": 10054640} +{"current_steps": 20435, "total_steps": 37885, "loss": 0.1204, "lr": 1.0371237364296491e-06, "epoch": 2.696977695657912, "percentage": 53.94, "elapsed_time": "0:29:53", "remaining_time": "0:25:31", "throughput": 5606.54, "total_tokens": 10057072} +{"current_steps": 20440, "total_steps": 37885, "loss": 0.0581, "lr": 1.0366633519514104e-06, "epoch": 2.6976375874356604, "percentage": 53.95, "elapsed_time": "0:29:54", "remaining_time": "0:25:31", "throughput": 5606.81, "total_tokens": 10059376} +{"current_steps": 20445, "total_steps": 37885, "loss": 0.0596, "lr": 1.0362029596916407e-06, "epoch": 2.698297479213409, "percentage": 53.97, "elapsed_time": "0:29:54", "remaining_time": "0:25:30", "throughput": 5607.21, "total_tokens": 10061936} +{"current_steps": 20450, "total_steps": 37885, "loss": 0.0001, "lr": 1.0357425597480548e-06, "epoch": 2.6989573709911574, "percentage": 53.98, "elapsed_time": "0:29:54", "remaining_time": "0:25:30", "throughput": 5607.48, "total_tokens": 10064240} +{"current_steps": 20455, "total_steps": 37885, "loss": 0.0458, "lr": 1.0352821522183697e-06, "epoch": 2.699617262768906, "percentage": 53.99, "elapsed_time": "0:29:55", "remaining_time": "0:25:29", "throughput": 5607.78, "total_tokens": 10066608} +{"current_steps": 20460, "total_steps": 37885, "loss": 0.0203, "lr": 1.0348217372003032e-06, "epoch": 2.7002771545466544, "percentage": 54.01, "elapsed_time": "0:29:55", "remaining_time": "0:25:29", "throughput": 5608.01, "total_tokens": 10068848} +{"current_steps": 20465, "total_steps": 37885, "loss": 0.0227, "lr": 1.0343613147915748e-06, "epoch": 2.7009370463244027, "percentage": 54.02, "elapsed_time": "0:29:55", "remaining_time": "0:25:28", "throughput": 5608.27, "total_tokens": 10071152} +{"current_steps": 20470, "total_steps": 37885, "loss": 0.0001, "lr": 1.0339008850899067e-06, "epoch": 2.7015969381021514, "percentage": 54.03, "elapsed_time": "0:29:56", "remaining_time": "0:25:28", "throughput": 5608.66, "total_tokens": 10073712} +{"current_steps": 20475, "total_steps": 37885, "loss": 0.0004, "lr": 1.033440448193021e-06, "epoch": 2.7022568298798997, "percentage": 54.05, "elapsed_time": "0:29:56", "remaining_time": "0:25:27", "throughput": 5609.06, "total_tokens": 10076272} +{"current_steps": 20480, "total_steps": 37885, "loss": 0.0001, "lr": 1.0329800041986423e-06, "epoch": 2.7029167216576484, "percentage": 54.06, "elapsed_time": "0:29:56", "remaining_time": "0:25:26", "throughput": 5609.25, "total_tokens": 10078448} +{"current_steps": 20485, "total_steps": 37885, "loss": 0.0009, "lr": 1.0325195532044966e-06, "epoch": 2.7035766134353967, "percentage": 54.07, "elapsed_time": "0:29:57", "remaining_time": "0:25:26", "throughput": 5609.64, "total_tokens": 10081008} +{"current_steps": 20490, "total_steps": 37885, "loss": 0.0001, "lr": 1.032059095308311e-06, "epoch": 2.704236505213145, "percentage": 54.08, "elapsed_time": "0:29:57", "remaining_time": "0:25:25", "throughput": 5609.91, "total_tokens": 10083312} +{"current_steps": 20495, "total_steps": 37885, "loss": 0.0, "lr": 1.0315986306078149e-06, "epoch": 2.7048963969908932, "percentage": 54.1, "elapsed_time": "0:29:57", "remaining_time": "0:25:25", "throughput": 5610.45, "total_tokens": 10086192} +{"current_steps": 20500, "total_steps": 37885, "loss": 0.097, "lr": 1.031138159200738e-06, "epoch": 2.705556288768642, "percentage": 54.11, "elapsed_time": "0:29:58", "remaining_time": "0:25:24", "throughput": 5610.68, "total_tokens": 10088432} +{"current_steps": 20505, "total_steps": 37885, "loss": 0.0813, "lr": 1.0306776811848124e-06, "epoch": 2.7062161805463902, "percentage": 54.12, "elapsed_time": "0:29:58", "remaining_time": "0:25:24", "throughput": 5611.1, "total_tokens": 10091056} +{"current_steps": 20510, "total_steps": 37885, "loss": 0.0367, "lr": 1.030217196657771e-06, "epoch": 2.706876072324139, "percentage": 54.14, "elapsed_time": "0:29:58", "remaining_time": "0:25:23", "throughput": 5611.47, "total_tokens": 10093552} +{"current_steps": 20515, "total_steps": 37885, "loss": 0.0882, "lr": 1.0297567057173486e-06, "epoch": 2.7075359641018872, "percentage": 54.15, "elapsed_time": "0:29:59", "remaining_time": "0:25:23", "throughput": 5611.82, "total_tokens": 10096048} +{"current_steps": 20520, "total_steps": 37885, "loss": 0.0012, "lr": 1.0292962084612808e-06, "epoch": 2.7081958558796355, "percentage": 54.16, "elapsed_time": "0:29:59", "remaining_time": "0:25:22", "throughput": 5612.14, "total_tokens": 10098480} +{"current_steps": 20525, "total_steps": 37885, "loss": 0.0012, "lr": 1.0288357049873051e-06, "epoch": 2.7088557476573842, "percentage": 54.18, "elapsed_time": "0:29:59", "remaining_time": "0:25:22", "throughput": 5612.69, "total_tokens": 10101360} +{"current_steps": 20530, "total_steps": 37885, "loss": 0.0799, "lr": 1.0283751953931595e-06, "epoch": 2.7095156394351325, "percentage": 54.19, "elapsed_time": "0:30:00", "remaining_time": "0:25:21", "throughput": 5613.05, "total_tokens": 10103856} +{"current_steps": 20535, "total_steps": 37885, "loss": 0.1165, "lr": 1.0279146797765845e-06, "epoch": 2.7101755312128812, "percentage": 54.2, "elapsed_time": "0:30:00", "remaining_time": "0:25:21", "throughput": 5613.24, "total_tokens": 10106032} +{"current_steps": 20540, "total_steps": 37885, "loss": 0.1146, "lr": 1.0274541582353204e-06, "epoch": 2.7108354229906295, "percentage": 54.22, "elapsed_time": "0:30:00", "remaining_time": "0:25:20", "throughput": 5613.5, "total_tokens": 10108336} +{"current_steps": 20545, "total_steps": 37885, "loss": 0.0007, "lr": 1.0269936308671106e-06, "epoch": 2.711495314768378, "percentage": 54.23, "elapsed_time": "0:30:01", "remaining_time": "0:25:20", "throughput": 5613.99, "total_tokens": 10111088} +{"current_steps": 20550, "total_steps": 37885, "loss": 0.0595, "lr": 1.0265330977696977e-06, "epoch": 2.7121552065461265, "percentage": 54.24, "elapsed_time": "0:30:01", "remaining_time": "0:25:19", "throughput": 5614.35, "total_tokens": 10113584} +{"current_steps": 20555, "total_steps": 37885, "loss": 0.0, "lr": 1.0260725590408273e-06, "epoch": 2.712815098323875, "percentage": 54.26, "elapsed_time": "0:30:01", "remaining_time": "0:25:19", "throughput": 5614.68, "total_tokens": 10116016} +{"current_steps": 20560, "total_steps": 37885, "loss": 0.0612, "lr": 1.0256120147782445e-06, "epoch": 2.7134749901016235, "percentage": 54.27, "elapsed_time": "0:30:02", "remaining_time": "0:25:18", "throughput": 5615.16, "total_tokens": 10118768} +{"current_steps": 20565, "total_steps": 37885, "loss": 0.0013, "lr": 1.0251514650796975e-06, "epoch": 2.714134881879372, "percentage": 54.28, "elapsed_time": "0:30:02", "remaining_time": "0:25:17", "throughput": 5615.39, "total_tokens": 10121008} +{"current_steps": 20570, "total_steps": 37885, "loss": 0.003, "lr": 1.024690910042934e-06, "epoch": 2.71479477365712, "percentage": 54.3, "elapsed_time": "0:30:02", "remaining_time": "0:25:17", "throughput": 5615.88, "total_tokens": 10123760} +{"current_steps": 20575, "total_steps": 37885, "loss": 0.0534, "lr": 1.0242303497657038e-06, "epoch": 2.715454665434869, "percentage": 54.31, "elapsed_time": "0:30:03", "remaining_time": "0:25:16", "throughput": 5616.18, "total_tokens": 10126128} +{"current_steps": 20580, "total_steps": 37885, "loss": 0.0001, "lr": 1.023769784345757e-06, "epoch": 2.716114557212617, "percentage": 54.32, "elapsed_time": "0:30:03", "remaining_time": "0:25:16", "throughput": 5616.52, "total_tokens": 10128560} +{"current_steps": 20585, "total_steps": 37885, "loss": 0.0412, "lr": 1.0233092138808457e-06, "epoch": 2.716774448990366, "percentage": 54.34, "elapsed_time": "0:30:03", "remaining_time": "0:25:15", "throughput": 5616.84, "total_tokens": 10130992} +{"current_steps": 20590, "total_steps": 37885, "loss": 0.0032, "lr": 1.0228486384687226e-06, "epoch": 2.717434340768114, "percentage": 54.35, "elapsed_time": "0:30:04", "remaining_time": "0:25:15", "throughput": 5617.33, "total_tokens": 10133744} +{"current_steps": 20595, "total_steps": 37885, "loss": 0.0358, "lr": 1.0223880582071413e-06, "epoch": 2.7180942325458624, "percentage": 54.36, "elapsed_time": "0:30:04", "remaining_time": "0:25:14", "throughput": 5617.61, "total_tokens": 10136112} +{"current_steps": 20600, "total_steps": 37885, "loss": 0.0007, "lr": 1.0219274731938574e-06, "epoch": 2.718754124323611, "percentage": 54.38, "elapsed_time": "0:30:04", "remaining_time": "0:25:14", "throughput": 5617.84, "total_tokens": 10138352} +{"current_steps": 20605, "total_steps": 37885, "loss": 0.0695, "lr": 1.0214668835266255e-06, "epoch": 2.7194140161013594, "percentage": 54.39, "elapsed_time": "0:30:04", "remaining_time": "0:25:13", "throughput": 5618.14, "total_tokens": 10140720} +{"current_steps": 20610, "total_steps": 37885, "loss": 0.0, "lr": 1.021006289303203e-06, "epoch": 2.720073907879108, "percentage": 54.4, "elapsed_time": "0:30:05", "remaining_time": "0:25:13", "throughput": 5618.39, "total_tokens": 10143024} +{"current_steps": 20615, "total_steps": 37885, "loss": 0.0383, "lr": 1.020545690621348e-06, "epoch": 2.7207337996568564, "percentage": 54.41, "elapsed_time": "0:30:05", "remaining_time": "0:25:12", "throughput": 5618.72, "total_tokens": 10145456} +{"current_steps": 20620, "total_steps": 37885, "loss": 0.0006, "lr": 1.0200850875788187e-06, "epoch": 2.7213936914346046, "percentage": 54.43, "elapsed_time": "0:30:05", "remaining_time": "0:25:12", "throughput": 5618.81, "total_tokens": 10147440} +{"current_steps": 20625, "total_steps": 37885, "loss": 0.0003, "lr": 1.0196244802733752e-06, "epoch": 2.722053583212353, "percentage": 54.44, "elapsed_time": "0:30:06", "remaining_time": "0:25:11", "throughput": 5619.1, "total_tokens": 10149808} +{"current_steps": 20630, "total_steps": 37885, "loss": 0.0002, "lr": 1.0191638688027777e-06, "epoch": 2.7227134749901016, "percentage": 54.45, "elapsed_time": "0:30:06", "remaining_time": "0:25:11", "throughput": 5619.43, "total_tokens": 10152240} +{"current_steps": 20635, "total_steps": 37885, "loss": 0.0, "lr": 1.0187032532647881e-06, "epoch": 2.72337336676785, "percentage": 54.47, "elapsed_time": "0:30:06", "remaining_time": "0:25:10", "throughput": 5619.82, "total_tokens": 10154800} +{"current_steps": 20640, "total_steps": 37885, "loss": 0.0, "lr": 1.018242633757168e-06, "epoch": 2.7240332585455986, "percentage": 54.48, "elapsed_time": "0:30:07", "remaining_time": "0:25:10", "throughput": 5620.08, "total_tokens": 10157104} +{"current_steps": 20645, "total_steps": 37885, "loss": 0.1595, "lr": 1.0177820103776814e-06, "epoch": 2.724693150323347, "percentage": 54.49, "elapsed_time": "0:30:07", "remaining_time": "0:25:09", "throughput": 5620.5, "total_tokens": 10159728} +{"current_steps": 20650, "total_steps": 37885, "loss": 0.0002, "lr": 1.0173213832240918e-06, "epoch": 2.725353042101095, "percentage": 54.51, "elapsed_time": "0:30:07", "remaining_time": "0:25:08", "throughput": 5620.88, "total_tokens": 10162288} +{"current_steps": 20655, "total_steps": 37885, "loss": 0.0004, "lr": 1.0168607523941637e-06, "epoch": 2.726012933878844, "percentage": 54.52, "elapsed_time": "0:30:08", "remaining_time": "0:25:08", "throughput": 5621.24, "total_tokens": 10164784} +{"current_steps": 20660, "total_steps": 37885, "loss": 0.0767, "lr": 1.0164001179856635e-06, "epoch": 2.726672825656592, "percentage": 54.53, "elapsed_time": "0:30:08", "remaining_time": "0:25:07", "throughput": 5621.64, "total_tokens": 10167344} +{"current_steps": 20665, "total_steps": 37885, "loss": 0.0, "lr": 1.0159394800963565e-06, "epoch": 2.727332717434341, "percentage": 54.55, "elapsed_time": "0:30:08", "remaining_time": "0:25:07", "throughput": 5622.06, "total_tokens": 10169968} +{"current_steps": 20670, "total_steps": 37885, "loss": 0.086, "lr": 1.0154788388240105e-06, "epoch": 2.727992609212089, "percentage": 54.56, "elapsed_time": "0:30:09", "remaining_time": "0:25:06", "throughput": 5622.38, "total_tokens": 10172400} +{"current_steps": 20675, "total_steps": 37885, "loss": 0.0003, "lr": 1.015018194266393e-06, "epoch": 2.7286525009898375, "percentage": 54.57, "elapsed_time": "0:30:09", "remaining_time": "0:25:06", "throughput": 5622.67, "total_tokens": 10174768} +{"current_steps": 20680, "total_steps": 37885, "loss": 0.0002, "lr": 1.0145575465212727e-06, "epoch": 2.729312392767586, "percentage": 54.59, "elapsed_time": "0:30:09", "remaining_time": "0:25:05", "throughput": 5622.97, "total_tokens": 10177136} +{"current_steps": 20685, "total_steps": 37885, "loss": 0.0355, "lr": 1.0140968956864186e-06, "epoch": 2.7299722845453345, "percentage": 54.6, "elapsed_time": "0:30:10", "remaining_time": "0:25:05", "throughput": 5623.16, "total_tokens": 10179312} +{"current_steps": 20690, "total_steps": 37885, "loss": 0.0001, "lr": 1.0136362418596004e-06, "epoch": 2.730632176323083, "percentage": 54.61, "elapsed_time": "0:30:10", "remaining_time": "0:25:04", "throughput": 5623.55, "total_tokens": 10181872} +{"current_steps": 20695, "total_steps": 37885, "loss": 0.0874, "lr": 1.0131755851385883e-06, "epoch": 2.7312920681008315, "percentage": 54.63, "elapsed_time": "0:30:10", "remaining_time": "0:25:04", "throughput": 5623.85, "total_tokens": 10184240} +{"current_steps": 20700, "total_steps": 37885, "loss": 0.0445, "lr": 1.012714925621154e-06, "epoch": 2.7319519598785797, "percentage": 54.64, "elapsed_time": "0:30:11", "remaining_time": "0:25:03", "throughput": 5624.1, "total_tokens": 10186544} +{"current_steps": 20705, "total_steps": 37885, "loss": 0.0517, "lr": 1.012254263405069e-06, "epoch": 2.7326118516563285, "percentage": 54.65, "elapsed_time": "0:30:11", "remaining_time": "0:25:03", "throughput": 5624.59, "total_tokens": 10189296} +{"current_steps": 20710, "total_steps": 37885, "loss": 0.08, "lr": 1.0117935985881048e-06, "epoch": 2.7332717434340768, "percentage": 54.67, "elapsed_time": "0:30:11", "remaining_time": "0:25:02", "throughput": 5625.03, "total_tokens": 10191984} +{"current_steps": 20715, "total_steps": 37885, "loss": 0.0001, "lr": 1.0113329312680352e-06, "epoch": 2.7339316352118255, "percentage": 54.68, "elapsed_time": "0:30:12", "remaining_time": "0:25:02", "throughput": 5625.44, "total_tokens": 10194608} +{"current_steps": 20720, "total_steps": 37885, "loss": 0.0008, "lr": 1.0108722615426326e-06, "epoch": 2.7345915269895738, "percentage": 54.69, "elapsed_time": "0:30:12", "remaining_time": "0:25:01", "throughput": 5625.8, "total_tokens": 10197104} +{"current_steps": 20725, "total_steps": 37885, "loss": 0.0003, "lr": 1.0104115895096715e-06, "epoch": 2.735251418767322, "percentage": 54.71, "elapsed_time": "0:30:12", "remaining_time": "0:25:01", "throughput": 5626.12, "total_tokens": 10199536} +{"current_steps": 20730, "total_steps": 37885, "loss": 0.0002, "lr": 1.0099509152669257e-06, "epoch": 2.7359113105450708, "percentage": 54.72, "elapsed_time": "0:30:13", "remaining_time": "0:25:00", "throughput": 5626.51, "total_tokens": 10202096} +{"current_steps": 20735, "total_steps": 37885, "loss": 0.0002, "lr": 1.0094902389121702e-06, "epoch": 2.736571202322819, "percentage": 54.73, "elapsed_time": "0:30:13", "remaining_time": "0:24:59", "throughput": 5626.81, "total_tokens": 10204464} +{"current_steps": 20740, "total_steps": 37885, "loss": 0.0001, "lr": 1.0090295605431805e-06, "epoch": 2.7372310941005678, "percentage": 54.74, "elapsed_time": "0:30:13", "remaining_time": "0:24:59", "throughput": 5627.2, "total_tokens": 10207024} +{"current_steps": 20745, "total_steps": 37885, "loss": 0.0, "lr": 1.0085688802577315e-06, "epoch": 2.737890985878316, "percentage": 54.76, "elapsed_time": "0:30:14", "remaining_time": "0:24:58", "throughput": 5627.65, "total_tokens": 10209712} +{"current_steps": 20750, "total_steps": 37885, "loss": 0.0011, "lr": 1.0081081981536001e-06, "epoch": 2.7385508776560643, "percentage": 54.77, "elapsed_time": "0:30:14", "remaining_time": "0:24:58", "throughput": 5627.98, "total_tokens": 10212144} +{"current_steps": 20755, "total_steps": 37885, "loss": 0.0938, "lr": 1.0076475143285623e-06, "epoch": 2.7392107694338126, "percentage": 54.78, "elapsed_time": "0:30:14", "remaining_time": "0:24:57", "throughput": 5628.43, "total_tokens": 10214832} +{"current_steps": 20760, "total_steps": 37885, "loss": 0.0229, "lr": 1.0071868288803948e-06, "epoch": 2.7398706612115613, "percentage": 54.8, "elapsed_time": "0:30:15", "remaining_time": "0:24:57", "throughput": 5628.78, "total_tokens": 10217328} +{"current_steps": 20765, "total_steps": 37885, "loss": 0.0003, "lr": 1.006726141906875e-06, "epoch": 2.7405305529893096, "percentage": 54.81, "elapsed_time": "0:30:15", "remaining_time": "0:24:56", "throughput": 5629.08, "total_tokens": 10219696} +{"current_steps": 20770, "total_steps": 37885, "loss": 0.0504, "lr": 1.0062654535057805e-06, "epoch": 2.7411904447670583, "percentage": 54.82, "elapsed_time": "0:30:15", "remaining_time": "0:24:56", "throughput": 5629.36, "total_tokens": 10222064} +{"current_steps": 20775, "total_steps": 37885, "loss": 0.0955, "lr": 1.0058047637748886e-06, "epoch": 2.7418503365448066, "percentage": 54.84, "elapsed_time": "0:30:16", "remaining_time": "0:24:55", "throughput": 5629.81, "total_tokens": 10224752} +{"current_steps": 20780, "total_steps": 37885, "loss": 0.0611, "lr": 1.0053440728119778e-06, "epoch": 2.742510228322555, "percentage": 54.85, "elapsed_time": "0:30:16", "remaining_time": "0:24:55", "throughput": 5630.17, "total_tokens": 10227248} +{"current_steps": 20785, "total_steps": 37885, "loss": 0.0001, "lr": 1.0048833807148263e-06, "epoch": 2.7431701201003036, "percentage": 54.86, "elapsed_time": "0:30:16", "remaining_time": "0:24:54", "throughput": 5630.53, "total_tokens": 10229744} +{"current_steps": 20790, "total_steps": 37885, "loss": 0.0001, "lr": 1.004422687581212e-06, "epoch": 2.743830011878052, "percentage": 54.88, "elapsed_time": "0:30:17", "remaining_time": "0:24:54", "throughput": 5630.85, "total_tokens": 10232176} +{"current_steps": 20795, "total_steps": 37885, "loss": 0.179, "lr": 1.0039619935089149e-06, "epoch": 2.7444899036558006, "percentage": 54.89, "elapsed_time": "0:30:17", "remaining_time": "0:24:53", "throughput": 5631.17, "total_tokens": 10234608} +{"current_steps": 20800, "total_steps": 37885, "loss": 0.0004, "lr": 1.0035012985957132e-06, "epoch": 2.745149795433549, "percentage": 54.9, "elapsed_time": "0:30:17", "remaining_time": "0:24:53", "throughput": 5631.5, "total_tokens": 10237040} +{"current_steps": 20805, "total_steps": 37885, "loss": 0.0003, "lr": 1.0030406029393863e-06, "epoch": 2.745809687211297, "percentage": 54.92, "elapsed_time": "0:30:18", "remaining_time": "0:24:52", "throughput": 5631.79, "total_tokens": 10239408} +{"current_steps": 20810, "total_steps": 37885, "loss": 0.1464, "lr": 1.0025799066377134e-06, "epoch": 2.746469578989046, "percentage": 54.93, "elapsed_time": "0:30:18", "remaining_time": "0:24:52", "throughput": 5632.11, "total_tokens": 10241840} +{"current_steps": 20815, "total_steps": 37885, "loss": 0.0002, "lr": 1.0021192097884738e-06, "epoch": 2.747129470766794, "percentage": 54.94, "elapsed_time": "0:30:18", "remaining_time": "0:24:51", "throughput": 5632.43, "total_tokens": 10244272} +{"current_steps": 20820, "total_steps": 37885, "loss": 0.0029, "lr": 1.0016585124894478e-06, "epoch": 2.747789362544543, "percentage": 54.96, "elapsed_time": "0:30:19", "remaining_time": "0:24:51", "throughput": 5632.87, "total_tokens": 10246960} +{"current_steps": 20825, "total_steps": 37885, "loss": 0.0008, "lr": 1.0011978148384137e-06, "epoch": 2.748449254322291, "percentage": 54.97, "elapsed_time": "0:30:19", "remaining_time": "0:24:50", "throughput": 5633.36, "total_tokens": 10249712} +{"current_steps": 20830, "total_steps": 37885, "loss": 0.0627, "lr": 1.0007371169331527e-06, "epoch": 2.7491091461000394, "percentage": 54.98, "elapsed_time": "0:30:19", "remaining_time": "0:24:49", "throughput": 5633.81, "total_tokens": 10252400} +{"current_steps": 20835, "total_steps": 37885, "loss": 0.0152, "lr": 1.0002764188714438e-06, "epoch": 2.749769037877788, "percentage": 55.0, "elapsed_time": "0:30:20", "remaining_time": "0:24:49", "throughput": 5634.22, "total_tokens": 10255024} +{"current_steps": 20840, "total_steps": 37885, "loss": 0.0001, "lr": 9.99815720751067e-07, "epoch": 2.7504289296555364, "percentage": 55.01, "elapsed_time": "0:30:20", "remaining_time": "0:24:48", "throughput": 5634.52, "total_tokens": 10257392} +{"current_steps": 20845, "total_steps": 37885, "loss": 0.0429, "lr": 9.993550226698021e-07, "epoch": 2.751088821433285, "percentage": 55.02, "elapsed_time": "0:30:20", "remaining_time": "0:24:48", "throughput": 5634.68, "total_tokens": 10259504} +{"current_steps": 20845, "total_steps": 37885, "eval_loss": 0.16027498245239258, "epoch": 2.751088821433285, "percentage": 55.02, "elapsed_time": "0:30:28", "remaining_time": "0:24:54", "throughput": 5610.55, "total_tokens": 10259504} +{"current_steps": 20850, "total_steps": 37885, "loss": 0.0397, "lr": 9.988943247254293e-07, "epoch": 2.7517487132110334, "percentage": 55.03, "elapsed_time": "0:31:03", "remaining_time": "0:25:22", "throughput": 5508.12, "total_tokens": 10261808} +{"current_steps": 20855, "total_steps": 37885, "loss": 0.038, "lr": 9.984336270157277e-07, "epoch": 2.7524086049887817, "percentage": 55.05, "elapsed_time": "0:31:03", "remaining_time": "0:25:21", "throughput": 5508.45, "total_tokens": 10264240} +{"current_steps": 20860, "total_steps": 37885, "loss": 0.0004, "lr": 9.979729296384775e-07, "epoch": 2.7530684967665304, "percentage": 55.06, "elapsed_time": "0:31:03", "remaining_time": "0:25:21", "throughput": 5508.81, "total_tokens": 10266736} +{"current_steps": 20865, "total_steps": 37885, "loss": 0.2016, "lr": 9.97512232691458e-07, "epoch": 2.7537283885442787, "percentage": 55.07, "elapsed_time": "0:31:04", "remaining_time": "0:25:20", "throughput": 5509.29, "total_tokens": 10269488} +{"current_steps": 20870, "total_steps": 37885, "loss": 0.0143, "lr": 9.970515362724497e-07, "epoch": 2.7543882803220274, "percentage": 55.09, "elapsed_time": "0:31:04", "remaining_time": "0:25:19", "throughput": 5509.61, "total_tokens": 10271920} +{"current_steps": 20875, "total_steps": 37885, "loss": 0.1161, "lr": 9.965908404792313e-07, "epoch": 2.7550481720997757, "percentage": 55.1, "elapsed_time": "0:31:04", "remaining_time": "0:25:19", "throughput": 5510.08, "total_tokens": 10274672} +{"current_steps": 20880, "total_steps": 37885, "loss": 0.0524, "lr": 9.96130145409582e-07, "epoch": 2.755708063877524, "percentage": 55.11, "elapsed_time": "0:31:05", "remaining_time": "0:25:18", "throughput": 5510.55, "total_tokens": 10277424} +{"current_steps": 20885, "total_steps": 37885, "loss": 0.0002, "lr": 9.956694511612817e-07, "epoch": 2.7563679556552723, "percentage": 55.13, "elapsed_time": "0:31:05", "remaining_time": "0:25:18", "throughput": 5510.89, "total_tokens": 10279920} +{"current_steps": 20890, "total_steps": 37885, "loss": 0.0058, "lr": 9.952087578321086e-07, "epoch": 2.757027847433021, "percentage": 55.14, "elapsed_time": "0:31:05", "remaining_time": "0:25:17", "throughput": 5511.25, "total_tokens": 10282480} +{"current_steps": 20895, "total_steps": 37885, "loss": 0.1002, "lr": 9.947480655198423e-07, "epoch": 2.7576877392107697, "percentage": 55.15, "elapsed_time": "0:31:06", "remaining_time": "0:25:17", "throughput": 5511.59, "total_tokens": 10284976} +{"current_steps": 20900, "total_steps": 37885, "loss": 0.0384, "lr": 9.94287374322261e-07, "epoch": 2.758347630988518, "percentage": 55.17, "elapsed_time": "0:31:06", "remaining_time": "0:25:16", "throughput": 5511.85, "total_tokens": 10287344} +{"current_steps": 20905, "total_steps": 37885, "loss": 0.0002, "lr": 9.93826684337143e-07, "epoch": 2.7590075227662663, "percentage": 55.18, "elapsed_time": "0:31:06", "remaining_time": "0:25:16", "throughput": 5512.07, "total_tokens": 10289648} +{"current_steps": 20910, "total_steps": 37885, "loss": 0.0342, "lr": 9.933659956622668e-07, "epoch": 2.7596674145440145, "percentage": 55.19, "elapsed_time": "0:31:07", "remaining_time": "0:25:15", "throughput": 5512.29, "total_tokens": 10291952} +{"current_steps": 20915, "total_steps": 37885, "loss": 0.0798, "lr": 9.929053083954096e-07, "epoch": 2.7603273063217633, "percentage": 55.21, "elapsed_time": "0:31:07", "remaining_time": "0:25:15", "throughput": 5512.74, "total_tokens": 10294704} +{"current_steps": 20920, "total_steps": 37885, "loss": 0.0007, "lr": 9.924446226343496e-07, "epoch": 2.7609871980995115, "percentage": 55.22, "elapsed_time": "0:31:07", "remaining_time": "0:25:14", "throughput": 5513.1, "total_tokens": 10297264} +{"current_steps": 20925, "total_steps": 37885, "loss": 0.077, "lr": 9.91983938476864e-07, "epoch": 2.7616470898772603, "percentage": 55.23, "elapsed_time": "0:31:08", "remaining_time": "0:25:14", "throughput": 5513.2, "total_tokens": 10299312} +{"current_steps": 20930, "total_steps": 37885, "loss": 0.0904, "lr": 9.915232560207288e-07, "epoch": 2.7623069816550085, "percentage": 55.25, "elapsed_time": "0:31:08", "remaining_time": "0:25:13", "throughput": 5513.42, "total_tokens": 10301616} +{"current_steps": 20935, "total_steps": 37885, "loss": 0.0017, "lr": 9.910625753637215e-07, "epoch": 2.762966873432757, "percentage": 55.26, "elapsed_time": "0:31:08", "remaining_time": "0:25:13", "throughput": 5513.71, "total_tokens": 10303984} +{"current_steps": 20940, "total_steps": 37885, "loss": 0.075, "lr": 9.906018966036177e-07, "epoch": 2.7636267652105055, "percentage": 55.27, "elapsed_time": "0:31:09", "remaining_time": "0:25:12", "throughput": 5514.13, "total_tokens": 10306608} +{"current_steps": 20945, "total_steps": 37885, "loss": 0.0004, "lr": 9.901412198381935e-07, "epoch": 2.764286656988254, "percentage": 55.29, "elapsed_time": "0:31:09", "remaining_time": "0:25:11", "throughput": 5514.46, "total_tokens": 10309040} +{"current_steps": 20950, "total_steps": 37885, "loss": 0.0003, "lr": 9.89680545165224e-07, "epoch": 2.7649465487660025, "percentage": 55.3, "elapsed_time": "0:31:09", "remaining_time": "0:25:11", "throughput": 5514.68, "total_tokens": 10311280} +{"current_steps": 20955, "total_steps": 37885, "loss": 0.0475, "lr": 9.892198726824835e-07, "epoch": 2.765606440543751, "percentage": 55.31, "elapsed_time": "0:31:10", "remaining_time": "0:25:10", "throughput": 5515.03, "total_tokens": 10313776} +{"current_steps": 20960, "total_steps": 37885, "loss": 0.0412, "lr": 9.887592024877478e-07, "epoch": 2.766266332321499, "percentage": 55.33, "elapsed_time": "0:31:10", "remaining_time": "0:25:10", "throughput": 5515.45, "total_tokens": 10316400} +{"current_steps": 20965, "total_steps": 37885, "loss": 0.0002, "lr": 9.882985346787892e-07, "epoch": 2.766926224099248, "percentage": 55.34, "elapsed_time": "0:31:10", "remaining_time": "0:25:09", "throughput": 5515.86, "total_tokens": 10319024} +{"current_steps": 20970, "total_steps": 37885, "loss": 0.0068, "lr": 9.878378693533825e-07, "epoch": 2.767586115876996, "percentage": 55.35, "elapsed_time": "0:31:11", "remaining_time": "0:25:09", "throughput": 5516.25, "total_tokens": 10321584} +{"current_steps": 20975, "total_steps": 37885, "loss": 0.0035, "lr": 9.873772066092998e-07, "epoch": 2.768246007654745, "percentage": 55.36, "elapsed_time": "0:31:11", "remaining_time": "0:25:08", "throughput": 5516.55, "total_tokens": 10323952} +{"current_steps": 20980, "total_steps": 37885, "loss": 0.0556, "lr": 9.869165465443132e-07, "epoch": 2.768905899432493, "percentage": 55.38, "elapsed_time": "0:31:11", "remaining_time": "0:25:08", "throughput": 5516.87, "total_tokens": 10326384} +{"current_steps": 20985, "total_steps": 37885, "loss": 0.0006, "lr": 9.864558892561955e-07, "epoch": 2.7695657912102414, "percentage": 55.39, "elapsed_time": "0:31:12", "remaining_time": "0:25:07", "throughput": 5517.13, "total_tokens": 10328688} +{"current_steps": 20990, "total_steps": 37885, "loss": 0.0582, "lr": 9.859952348427167e-07, "epoch": 2.77022568298799, "percentage": 55.4, "elapsed_time": "0:31:12", "remaining_time": "0:25:07", "throughput": 5517.56, "total_tokens": 10331312} +{"current_steps": 20995, "total_steps": 37885, "loss": 0.0782, "lr": 9.855345834016481e-07, "epoch": 2.7708855747657384, "percentage": 55.42, "elapsed_time": "0:31:12", "remaining_time": "0:25:06", "throughput": 5517.84, "total_tokens": 10333680} +{"current_steps": 21000, "total_steps": 37885, "loss": 0.0438, "lr": 9.850739350307595e-07, "epoch": 2.771545466543487, "percentage": 55.43, "elapsed_time": "0:31:13", "remaining_time": "0:25:06", "throughput": 5518.23, "total_tokens": 10336240} +{"current_steps": 21005, "total_steps": 37885, "loss": 0.0004, "lr": 9.846132898278198e-07, "epoch": 2.7722053583212354, "percentage": 55.44, "elapsed_time": "0:31:13", "remaining_time": "0:25:05", "throughput": 5518.52, "total_tokens": 10338608} +{"current_steps": 21010, "total_steps": 37885, "loss": 0.1489, "lr": 9.84152647890598e-07, "epoch": 2.7728652500989837, "percentage": 55.46, "elapsed_time": "0:31:13", "remaining_time": "0:25:04", "throughput": 5518.95, "total_tokens": 10341296} +{"current_steps": 21015, "total_steps": 37885, "loss": 0.0001, "lr": 9.83692009316862e-07, "epoch": 2.7735251418767324, "percentage": 55.47, "elapsed_time": "0:31:14", "remaining_time": "0:25:04", "throughput": 5519.43, "total_tokens": 10344048} +{"current_steps": 21020, "total_steps": 37885, "loss": 0.0002, "lr": 9.832313742043792e-07, "epoch": 2.7741850336544807, "percentage": 55.48, "elapsed_time": "0:31:14", "remaining_time": "0:25:03", "throughput": 5519.69, "total_tokens": 10346352} +{"current_steps": 21025, "total_steps": 37885, "loss": 0.0472, "lr": 9.827707426509155e-07, "epoch": 2.7748449254322294, "percentage": 55.5, "elapsed_time": "0:31:14", "remaining_time": "0:25:03", "throughput": 5520.01, "total_tokens": 10348784} +{"current_steps": 21030, "total_steps": 37885, "loss": 0.0002, "lr": 9.823101147542368e-07, "epoch": 2.7755048172099777, "percentage": 55.51, "elapsed_time": "0:31:15", "remaining_time": "0:25:02", "throughput": 5520.42, "total_tokens": 10351344} +{"current_steps": 21035, "total_steps": 37885, "loss": 0.0003, "lr": 9.818494906121084e-07, "epoch": 2.776164708987726, "percentage": 55.52, "elapsed_time": "0:31:15", "remaining_time": "0:25:02", "throughput": 5520.88, "total_tokens": 10354032} +{"current_steps": 21040, "total_steps": 37885, "loss": 0.0003, "lr": 9.813888703222938e-07, "epoch": 2.776824600765474, "percentage": 55.54, "elapsed_time": "0:31:15", "remaining_time": "0:25:01", "throughput": 5521.3, "total_tokens": 10356656} +{"current_steps": 21045, "total_steps": 37885, "loss": 0.0059, "lr": 9.809282539825573e-07, "epoch": 2.777484492543223, "percentage": 55.55, "elapsed_time": "0:31:16", "remaining_time": "0:25:01", "throughput": 5521.72, "total_tokens": 10359280} +{"current_steps": 21050, "total_steps": 37885, "loss": 0.0612, "lr": 9.804676416906605e-07, "epoch": 2.778144384320971, "percentage": 55.56, "elapsed_time": "0:31:16", "remaining_time": "0:25:00", "throughput": 5522.05, "total_tokens": 10361712} +{"current_steps": 21055, "total_steps": 37885, "loss": 0.1814, "lr": 9.800070335443651e-07, "epoch": 2.77880427609872, "percentage": 55.58, "elapsed_time": "0:31:16", "remaining_time": "0:25:00", "throughput": 5522.49, "total_tokens": 10364400} +{"current_steps": 21060, "total_steps": 37885, "loss": 0.024, "lr": 9.795464296414323e-07, "epoch": 2.779464167876468, "percentage": 55.59, "elapsed_time": "0:31:17", "remaining_time": "0:24:59", "throughput": 5522.92, "total_tokens": 10367024} +{"current_steps": 21065, "total_steps": 37885, "loss": 0.0338, "lr": 9.790858300796214e-07, "epoch": 2.7801240596542165, "percentage": 55.6, "elapsed_time": "0:31:17", "remaining_time": "0:24:59", "throughput": 5523.28, "total_tokens": 10369520} +{"current_steps": 21070, "total_steps": 37885, "loss": 0.004, "lr": 9.78625234956692e-07, "epoch": 2.780783951431965, "percentage": 55.62, "elapsed_time": "0:31:17", "remaining_time": "0:24:58", "throughput": 5523.79, "total_tokens": 10372336} +{"current_steps": 21075, "total_steps": 37885, "loss": 0.0019, "lr": 9.781646443704014e-07, "epoch": 2.7814438432097135, "percentage": 55.63, "elapsed_time": "0:31:18", "remaining_time": "0:24:58", "throughput": 5524.25, "total_tokens": 10375024} +{"current_steps": 21080, "total_steps": 37885, "loss": 0.0002, "lr": 9.777040584185072e-07, "epoch": 2.782103734987462, "percentage": 55.64, "elapsed_time": "0:31:18", "remaining_time": "0:24:57", "throughput": 5524.71, "total_tokens": 10377712} +{"current_steps": 21085, "total_steps": 37885, "loss": 0.0001, "lr": 9.772434771987652e-07, "epoch": 2.7827636267652105, "percentage": 55.66, "elapsed_time": "0:31:18", "remaining_time": "0:24:56", "throughput": 5524.94, "total_tokens": 10379952} +{"current_steps": 21090, "total_steps": 37885, "loss": 0.112, "lr": 9.7678290080893e-07, "epoch": 2.7834235185429588, "percentage": 55.67, "elapsed_time": "0:31:19", "remaining_time": "0:24:56", "throughput": 5525.3, "total_tokens": 10382448} +{"current_steps": 21095, "total_steps": 37885, "loss": 0.0793, "lr": 9.76322329346756e-07, "epoch": 2.7840834103207075, "percentage": 55.68, "elapsed_time": "0:31:19", "remaining_time": "0:24:55", "throughput": 5525.53, "total_tokens": 10384688} +{"current_steps": 21100, "total_steps": 37885, "loss": 0.1091, "lr": 9.758617629099961e-07, "epoch": 2.7847433020984558, "percentage": 55.69, "elapsed_time": "0:31:19", "remaining_time": "0:24:55", "throughput": 5525.84, "total_tokens": 10387120} +{"current_steps": 21105, "total_steps": 37885, "loss": 0.0003, "lr": 9.754012015964027e-07, "epoch": 2.7854031938762045, "percentage": 55.71, "elapsed_time": "0:31:20", "remaining_time": "0:24:54", "throughput": 5526.13, "total_tokens": 10389488} +{"current_steps": 21110, "total_steps": 37885, "loss": 0.0003, "lr": 9.749406455037262e-07, "epoch": 2.7860630856539528, "percentage": 55.72, "elapsed_time": "0:31:20", "remaining_time": "0:24:54", "throughput": 5526.53, "total_tokens": 10392048} +{"current_steps": 21115, "total_steps": 37885, "loss": 0.0001, "lr": 9.744800947297154e-07, "epoch": 2.786722977431701, "percentage": 55.73, "elapsed_time": "0:31:20", "remaining_time": "0:24:53", "throughput": 5526.83, "total_tokens": 10394416} +{"current_steps": 21120, "total_steps": 37885, "loss": 0.0831, "lr": 9.740195493721204e-07, "epoch": 2.7873828692094498, "percentage": 55.75, "elapsed_time": "0:31:21", "remaining_time": "0:24:53", "throughput": 5527.17, "total_tokens": 10396912} +{"current_steps": 21125, "total_steps": 37885, "loss": 0.0001, "lr": 9.735590095286874e-07, "epoch": 2.788042760987198, "percentage": 55.76, "elapsed_time": "0:31:21", "remaining_time": "0:24:52", "throughput": 5527.45, "total_tokens": 10399280} +{"current_steps": 21130, "total_steps": 37885, "loss": 0.0007, "lr": 9.730984752971634e-07, "epoch": 2.7887026527649468, "percentage": 55.77, "elapsed_time": "0:31:21", "remaining_time": "0:24:52", "throughput": 5527.9, "total_tokens": 10401968} +{"current_steps": 21135, "total_steps": 37885, "loss": 0.0, "lr": 9.726379467752937e-07, "epoch": 2.789362544542695, "percentage": 55.79, "elapsed_time": "0:31:22", "remaining_time": "0:24:51", "throughput": 5528.46, "total_tokens": 10404912} +{"current_steps": 21140, "total_steps": 37885, "loss": 0.111, "lr": 9.721774240608208e-07, "epoch": 2.7900224363204433, "percentage": 55.8, "elapsed_time": "0:31:22", "remaining_time": "0:24:51", "throughput": 5528.89, "total_tokens": 10407600} +{"current_steps": 21145, "total_steps": 37885, "loss": 0.0037, "lr": 9.71716907251489e-07, "epoch": 2.790682328098192, "percentage": 55.81, "elapsed_time": "0:31:22", "remaining_time": "0:24:50", "throughput": 5529.24, "total_tokens": 10410096} +{"current_steps": 21150, "total_steps": 37885, "loss": 0.0089, "lr": 9.712563964450378e-07, "epoch": 2.7913422198759403, "percentage": 55.83, "elapsed_time": "0:31:23", "remaining_time": "0:24:49", "throughput": 5529.67, "total_tokens": 10412720} +{"current_steps": 21155, "total_steps": 37885, "loss": 0.0001, "lr": 9.707958917392094e-07, "epoch": 2.792002111653689, "percentage": 55.84, "elapsed_time": "0:31:23", "remaining_time": "0:24:49", "throughput": 5529.95, "total_tokens": 10415088} +{"current_steps": 21160, "total_steps": 37885, "loss": 0.0325, "lr": 9.70335393231741e-07, "epoch": 2.7926620034314373, "percentage": 55.85, "elapsed_time": "0:31:23", "remaining_time": "0:24:48", "throughput": 5530.32, "total_tokens": 10417648} +{"current_steps": 21165, "total_steps": 37885, "loss": 0.0001, "lr": 9.698749010203704e-07, "epoch": 2.7933218952091856, "percentage": 55.87, "elapsed_time": "0:31:24", "remaining_time": "0:24:48", "throughput": 5530.61, "total_tokens": 10420016} +{"current_steps": 21170, "total_steps": 37885, "loss": 0.0985, "lr": 9.694144152028342e-07, "epoch": 2.793981786986934, "percentage": 55.88, "elapsed_time": "0:31:24", "remaining_time": "0:24:47", "throughput": 5531.04, "total_tokens": 10422704} +{"current_steps": 21175, "total_steps": 37885, "loss": 0.0004, "lr": 9.689539358768668e-07, "epoch": 2.7946416787646826, "percentage": 55.89, "elapsed_time": "0:31:24", "remaining_time": "0:24:47", "throughput": 5531.17, "total_tokens": 10424752} +{"current_steps": 21180, "total_steps": 37885, "loss": 0.0004, "lr": 9.684934631402016e-07, "epoch": 2.795301570542431, "percentage": 55.91, "elapsed_time": "0:31:25", "remaining_time": "0:24:46", "throughput": 5531.55, "total_tokens": 10427312} +{"current_steps": 21185, "total_steps": 37885, "loss": 0.0, "lr": 9.68032997090571e-07, "epoch": 2.7959614623201796, "percentage": 55.92, "elapsed_time": "0:31:25", "remaining_time": "0:24:46", "throughput": 5531.9, "total_tokens": 10429808} +{"current_steps": 21190, "total_steps": 37885, "loss": 0.0, "lr": 9.675725378257047e-07, "epoch": 2.796621354097928, "percentage": 55.93, "elapsed_time": "0:31:25", "remaining_time": "0:24:45", "throughput": 5532.28, "total_tokens": 10432368} +{"current_steps": 21195, "total_steps": 37885, "loss": 0.0902, "lr": 9.67112085443333e-07, "epoch": 2.797281245875676, "percentage": 55.95, "elapsed_time": "0:31:26", "remaining_time": "0:24:45", "throughput": 5532.52, "total_tokens": 10434672} +{"current_steps": 21200, "total_steps": 37885, "loss": 0.0641, "lr": 9.666516400411826e-07, "epoch": 2.797941137653425, "percentage": 55.96, "elapsed_time": "0:31:26", "remaining_time": "0:24:44", "throughput": 5532.84, "total_tokens": 10437168} +{"current_steps": 21205, "total_steps": 37885, "loss": 0.0, "lr": 9.661912017169803e-07, "epoch": 2.798601029431173, "percentage": 55.97, "elapsed_time": "0:31:26", "remaining_time": "0:24:44", "throughput": 5533.09, "total_tokens": 10439472} +{"current_steps": 21210, "total_steps": 37885, "loss": 0.0, "lr": 9.657307705684507e-07, "epoch": 2.799260921208922, "percentage": 55.99, "elapsed_time": "0:31:27", "remaining_time": "0:24:43", "throughput": 5533.38, "total_tokens": 10441840} +{"current_steps": 21215, "total_steps": 37885, "loss": 0.0975, "lr": 9.652703466933167e-07, "epoch": 2.79992081298667, "percentage": 56.0, "elapsed_time": "0:31:27", "remaining_time": "0:24:43", "throughput": 5533.68, "total_tokens": 10444272} +{"current_steps": 21220, "total_steps": 37885, "loss": 0.0003, "lr": 9.648099301893003e-07, "epoch": 2.8005807047644184, "percentage": 56.01, "elapsed_time": "0:31:27", "remaining_time": "0:24:42", "throughput": 5534.04, "total_tokens": 10446832} +{"current_steps": 21225, "total_steps": 37885, "loss": 0.135, "lr": 9.643495211541212e-07, "epoch": 2.801240596542167, "percentage": 56.02, "elapsed_time": "0:31:28", "remaining_time": "0:24:41", "throughput": 5534.28, "total_tokens": 10449136} +{"current_steps": 21230, "total_steps": 37885, "loss": 0.0473, "lr": 9.63889119685498e-07, "epoch": 2.8019004883199154, "percentage": 56.04, "elapsed_time": "0:31:28", "remaining_time": "0:24:41", "throughput": 5534.69, "total_tokens": 10451760} +{"current_steps": 21235, "total_steps": 37885, "loss": 0.0, "lr": 9.634287258811481e-07, "epoch": 2.802560380097664, "percentage": 56.05, "elapsed_time": "0:31:28", "remaining_time": "0:24:40", "throughput": 5534.9, "total_tokens": 10454000} +{"current_steps": 21240, "total_steps": 37885, "loss": 0.0016, "lr": 9.62968339838786e-07, "epoch": 2.8032202718754125, "percentage": 56.06, "elapsed_time": "0:31:29", "remaining_time": "0:24:40", "throughput": 5535.15, "total_tokens": 10456304} +{"current_steps": 21245, "total_steps": 37885, "loss": 0.0027, "lr": 9.625079616561256e-07, "epoch": 2.8038801636531607, "percentage": 56.08, "elapsed_time": "0:31:29", "remaining_time": "0:24:39", "throughput": 5535.5, "total_tokens": 10458800} +{"current_steps": 21250, "total_steps": 37885, "loss": 0.0001, "lr": 9.620475914308787e-07, "epoch": 2.8045400554309095, "percentage": 56.09, "elapsed_time": "0:31:29", "remaining_time": "0:24:39", "throughput": 5535.8, "total_tokens": 10461232} +{"current_steps": 21255, "total_steps": 37885, "loss": 0.2071, "lr": 9.615872292607559e-07, "epoch": 2.8051999472086577, "percentage": 56.1, "elapsed_time": "0:31:30", "remaining_time": "0:24:38", "throughput": 5536.04, "total_tokens": 10463536} +{"current_steps": 21260, "total_steps": 37885, "loss": 0.2321, "lr": 9.611268752434658e-07, "epoch": 2.8058598389864065, "percentage": 56.12, "elapsed_time": "0:31:30", "remaining_time": "0:24:38", "throughput": 5536.32, "total_tokens": 10465904} +{"current_steps": 21265, "total_steps": 37885, "loss": 0.0004, "lr": 9.606665294767144e-07, "epoch": 2.8065197307641547, "percentage": 56.13, "elapsed_time": "0:31:30", "remaining_time": "0:24:37", "throughput": 5536.61, "total_tokens": 10468272} +{"current_steps": 21270, "total_steps": 37885, "loss": 0.0368, "lr": 9.602061920582076e-07, "epoch": 2.807179622541903, "percentage": 56.14, "elapsed_time": "0:31:31", "remaining_time": "0:24:37", "throughput": 5536.87, "total_tokens": 10470576} +{"current_steps": 21275, "total_steps": 37885, "loss": 0.0004, "lr": 9.59745863085648e-07, "epoch": 2.8078395143196517, "percentage": 56.16, "elapsed_time": "0:31:31", "remaining_time": "0:24:36", "throughput": 5537.15, "total_tokens": 10472944} +{"current_steps": 21280, "total_steps": 37885, "loss": 0.0004, "lr": 9.59285542656738e-07, "epoch": 2.8084994060974, "percentage": 56.17, "elapsed_time": "0:31:31", "remaining_time": "0:24:36", "throughput": 5537.4, "total_tokens": 10475248} +{"current_steps": 21285, "total_steps": 37885, "loss": 0.0534, "lr": 9.588252308691768e-07, "epoch": 2.8091592978751487, "percentage": 56.18, "elapsed_time": "0:31:32", "remaining_time": "0:24:35", "throughput": 5537.77, "total_tokens": 10477808} +{"current_steps": 21290, "total_steps": 37885, "loss": 0.0402, "lr": 9.583649278206616e-07, "epoch": 2.809819189652897, "percentage": 56.2, "elapsed_time": "0:31:32", "remaining_time": "0:24:35", "throughput": 5538.05, "total_tokens": 10480176} +{"current_steps": 21295, "total_steps": 37885, "loss": 0.0887, "lr": 9.579046336088894e-07, "epoch": 2.8104790814306453, "percentage": 56.21, "elapsed_time": "0:31:32", "remaining_time": "0:24:34", "throughput": 5538.24, "total_tokens": 10482352} +{"current_steps": 21300, "total_steps": 37885, "loss": 0.2153, "lr": 9.574443483315533e-07, "epoch": 2.8111389732083936, "percentage": 56.22, "elapsed_time": "0:31:33", "remaining_time": "0:24:34", "throughput": 5538.6, "total_tokens": 10484912} +{"current_steps": 21305, "total_steps": 37885, "loss": 0.2879, "lr": 9.569840720863469e-07, "epoch": 2.8117988649861423, "percentage": 56.24, "elapsed_time": "0:31:33", "remaining_time": "0:24:33", "throughput": 5538.82, "total_tokens": 10487216} +{"current_steps": 21310, "total_steps": 37885, "loss": 0.0255, "lr": 9.565238049709596e-07, "epoch": 2.8124587567638906, "percentage": 56.25, "elapsed_time": "0:31:33", "remaining_time": "0:24:32", "throughput": 5539.21, "total_tokens": 10489840} +{"current_steps": 21315, "total_steps": 37885, "loss": 0.0002, "lr": 9.560635470830794e-07, "epoch": 2.8131186485416393, "percentage": 56.26, "elapsed_time": "0:31:34", "remaining_time": "0:24:32", "throughput": 5539.53, "total_tokens": 10492272} +{"current_steps": 21320, "total_steps": 37885, "loss": 0.0089, "lr": 9.556032985203934e-07, "epoch": 2.8137785403193876, "percentage": 56.28, "elapsed_time": "0:31:34", "remaining_time": "0:24:31", "throughput": 5539.85, "total_tokens": 10494768} +{"current_steps": 21325, "total_steps": 37885, "loss": 0.0758, "lr": 9.551430593805854e-07, "epoch": 2.814438432097136, "percentage": 56.29, "elapsed_time": "0:31:34", "remaining_time": "0:24:31", "throughput": 5540.22, "total_tokens": 10497328} +{"current_steps": 21330, "total_steps": 37885, "loss": 0.0628, "lr": 9.546828297613389e-07, "epoch": 2.8150983238748846, "percentage": 56.3, "elapsed_time": "0:31:35", "remaining_time": "0:24:30", "throughput": 5540.64, "total_tokens": 10500016} +{"current_steps": 21335, "total_steps": 37885, "loss": 0.1013, "lr": 9.542226097603335e-07, "epoch": 2.815758215652633, "percentage": 56.32, "elapsed_time": "0:31:35", "remaining_time": "0:24:30", "throughput": 5540.95, "total_tokens": 10502448} +{"current_steps": 21340, "total_steps": 37885, "loss": 0.0005, "lr": 9.537623994752473e-07, "epoch": 2.8164181074303816, "percentage": 56.33, "elapsed_time": "0:31:35", "remaining_time": "0:24:29", "throughput": 5541.31, "total_tokens": 10504944} +{"current_steps": 21345, "total_steps": 37885, "loss": 0.0752, "lr": 9.533021990037572e-07, "epoch": 2.81707799920813, "percentage": 56.34, "elapsed_time": "0:31:36", "remaining_time": "0:24:29", "throughput": 5541.62, "total_tokens": 10507440} +{"current_steps": 21350, "total_steps": 37885, "loss": 0.0179, "lr": 9.52842008443537e-07, "epoch": 2.817737890985878, "percentage": 56.35, "elapsed_time": "0:31:36", "remaining_time": "0:24:28", "throughput": 5541.85, "total_tokens": 10509680} +{"current_steps": 21355, "total_steps": 37885, "loss": 0.002, "lr": 9.523818278922593e-07, "epoch": 2.818397782763627, "percentage": 56.37, "elapsed_time": "0:31:36", "remaining_time": "0:24:28", "throughput": 5542.15, "total_tokens": 10512112} +{"current_steps": 21360, "total_steps": 37885, "loss": 0.0933, "lr": 9.519216574475937e-07, "epoch": 2.819057674541375, "percentage": 56.38, "elapsed_time": "0:31:37", "remaining_time": "0:24:27", "throughput": 5542.44, "total_tokens": 10514480} +{"current_steps": 21365, "total_steps": 37885, "loss": 0.0934, "lr": 9.514614972072082e-07, "epoch": 2.819717566319124, "percentage": 56.39, "elapsed_time": "0:31:37", "remaining_time": "0:24:27", "throughput": 5542.81, "total_tokens": 10517040} +{"current_steps": 21370, "total_steps": 37885, "loss": 0.0255, "lr": 9.510013472687683e-07, "epoch": 2.820377458096872, "percentage": 56.41, "elapsed_time": "0:31:37", "remaining_time": "0:24:26", "throughput": 5543.19, "total_tokens": 10519600} +{"current_steps": 21375, "total_steps": 37885, "loss": 0.0002, "lr": 9.505412077299377e-07, "epoch": 2.8210373498746204, "percentage": 56.42, "elapsed_time": "0:31:38", "remaining_time": "0:24:26", "throughput": 5543.62, "total_tokens": 10522288} +{"current_steps": 21380, "total_steps": 37885, "loss": 0.0009, "lr": 9.500810786883776e-07, "epoch": 2.821697241652369, "percentage": 56.43, "elapsed_time": "0:31:38", "remaining_time": "0:24:25", "throughput": 5544.04, "total_tokens": 10524976} +{"current_steps": 21385, "total_steps": 37885, "loss": 0.034, "lr": 9.496209602417472e-07, "epoch": 2.8223571334301174, "percentage": 56.45, "elapsed_time": "0:31:38", "remaining_time": "0:24:25", "throughput": 5544.45, "total_tokens": 10527600} +{"current_steps": 21390, "total_steps": 37885, "loss": 0.0767, "lr": 9.49160852487703e-07, "epoch": 2.823017025207866, "percentage": 56.46, "elapsed_time": "0:31:39", "remaining_time": "0:24:24", "throughput": 5544.72, "total_tokens": 10529968} +{"current_steps": 21395, "total_steps": 37885, "loss": 0.076, "lr": 9.487007555238997e-07, "epoch": 2.8236769169856144, "percentage": 56.47, "elapsed_time": "0:31:39", "remaining_time": "0:24:23", "throughput": 5544.95, "total_tokens": 10532272} +{"current_steps": 21400, "total_steps": 37885, "loss": 0.0648, "lr": 9.482406694479895e-07, "epoch": 2.8243368087633627, "percentage": 56.49, "elapsed_time": "0:31:39", "remaining_time": "0:24:23", "throughput": 5545.48, "total_tokens": 10535152} +{"current_steps": 21405, "total_steps": 37885, "loss": 0.1232, "lr": 9.477805943576226e-07, "epoch": 2.8249967005411114, "percentage": 56.5, "elapsed_time": "0:31:40", "remaining_time": "0:24:22", "throughput": 5545.84, "total_tokens": 10537712} +{"current_steps": 21410, "total_steps": 37885, "loss": 0.0002, "lr": 9.473205303504463e-07, "epoch": 2.8256565923188597, "percentage": 56.51, "elapsed_time": "0:31:40", "remaining_time": "0:24:22", "throughput": 5546.07, "total_tokens": 10540016} +{"current_steps": 21415, "total_steps": 37885, "loss": 0.1321, "lr": 9.468604775241061e-07, "epoch": 2.8263164840966084, "percentage": 56.53, "elapsed_time": "0:31:40", "remaining_time": "0:24:21", "throughput": 5546.4, "total_tokens": 10542512} +{"current_steps": 21420, "total_steps": 37885, "loss": 0.0401, "lr": 9.464004359762445e-07, "epoch": 2.8269763758743567, "percentage": 56.54, "elapsed_time": "0:31:41", "remaining_time": "0:24:21", "throughput": 5546.77, "total_tokens": 10545136} +{"current_steps": 21425, "total_steps": 37885, "loss": 0.0018, "lr": 9.459404058045023e-07, "epoch": 2.827636267652105, "percentage": 56.55, "elapsed_time": "0:31:41", "remaining_time": "0:24:20", "throughput": 5547.16, "total_tokens": 10547760} +{"current_steps": 21430, "total_steps": 37885, "loss": 0.0399, "lr": 9.454803871065176e-07, "epoch": 2.8282961594298532, "percentage": 56.57, "elapsed_time": "0:31:41", "remaining_time": "0:24:20", "throughput": 5547.34, "total_tokens": 10549936} +{"current_steps": 21435, "total_steps": 37885, "loss": 0.0011, "lr": 9.450203799799258e-07, "epoch": 2.828956051207602, "percentage": 56.58, "elapsed_time": "0:31:42", "remaining_time": "0:24:19", "throughput": 5547.53, "total_tokens": 10552176} +{"current_steps": 21440, "total_steps": 37885, "loss": 0.0415, "lr": 9.445603845223603e-07, "epoch": 2.8296159429853502, "percentage": 56.59, "elapsed_time": "0:31:42", "remaining_time": "0:24:19", "throughput": 5547.89, "total_tokens": 10554736} +{"current_steps": 21445, "total_steps": 37885, "loss": 0.1261, "lr": 9.44100400831452e-07, "epoch": 2.830275834763099, "percentage": 56.61, "elapsed_time": "0:31:42", "remaining_time": "0:24:18", "throughput": 5548.25, "total_tokens": 10557296} +{"current_steps": 21450, "total_steps": 37885, "loss": 0.0783, "lr": 9.436404290048282e-07, "epoch": 2.8309357265408472, "percentage": 56.62, "elapsed_time": "0:31:43", "remaining_time": "0:24:18", "throughput": 5548.66, "total_tokens": 10559984} +{"current_steps": 21455, "total_steps": 37885, "loss": 0.1771, "lr": 9.43180469140116e-07, "epoch": 2.8315956183185955, "percentage": 56.63, "elapsed_time": "0:31:43", "remaining_time": "0:24:17", "throughput": 5548.95, "total_tokens": 10562416} +{"current_steps": 21460, "total_steps": 37885, "loss": 0.0008, "lr": 9.427205213349369e-07, "epoch": 2.8322555100963442, "percentage": 56.65, "elapsed_time": "0:31:43", "remaining_time": "0:24:17", "throughput": 5549.31, "total_tokens": 10564976} +{"current_steps": 21465, "total_steps": 37885, "loss": 0.0006, "lr": 9.422605856869129e-07, "epoch": 2.8329154018740925, "percentage": 56.66, "elapsed_time": "0:31:44", "remaining_time": "0:24:16", "throughput": 5549.76, "total_tokens": 10567728} +{"current_steps": 21470, "total_steps": 37885, "loss": 0.0002, "lr": 9.418006622936618e-07, "epoch": 2.8335752936518412, "percentage": 56.67, "elapsed_time": "0:31:44", "remaining_time": "0:24:16", "throughput": 5550.19, "total_tokens": 10570416} +{"current_steps": 21475, "total_steps": 37885, "loss": 0.1179, "lr": 9.413407512527977e-07, "epoch": 2.8342351854295895, "percentage": 56.68, "elapsed_time": "0:31:44", "remaining_time": "0:24:15", "throughput": 5550.46, "total_tokens": 10572784} +{"current_steps": 21480, "total_steps": 37885, "loss": 0.0001, "lr": 9.408808526619352e-07, "epoch": 2.834895077207338, "percentage": 56.7, "elapsed_time": "0:31:45", "remaining_time": "0:24:15", "throughput": 5550.75, "total_tokens": 10575152} +{"current_steps": 21485, "total_steps": 37885, "loss": 0.0002, "lr": 9.404209666186831e-07, "epoch": 2.8355549689850865, "percentage": 56.71, "elapsed_time": "0:31:45", "remaining_time": "0:24:14", "throughput": 5551.09, "total_tokens": 10577648} +{"current_steps": 21490, "total_steps": 37885, "loss": 0.0001, "lr": 9.3996109322065e-07, "epoch": 2.836214860762835, "percentage": 56.72, "elapsed_time": "0:31:45", "remaining_time": "0:24:13", "throughput": 5551.47, "total_tokens": 10580208} +{"current_steps": 21495, "total_steps": 37885, "loss": 0.0355, "lr": 9.395012325654398e-07, "epoch": 2.8368747525405835, "percentage": 56.74, "elapsed_time": "0:31:46", "remaining_time": "0:24:13", "throughput": 5551.72, "total_tokens": 10582512} +{"current_steps": 21500, "total_steps": 37885, "loss": 0.0001, "lr": 9.390413847506547e-07, "epoch": 2.837534644318332, "percentage": 56.75, "elapsed_time": "0:31:46", "remaining_time": "0:24:12", "throughput": 5552.03, "total_tokens": 10584944} +{"current_steps": 21505, "total_steps": 37885, "loss": 0.049, "lr": 9.385815498738944e-07, "epoch": 2.83819453609608, "percentage": 56.76, "elapsed_time": "0:31:46", "remaining_time": "0:24:12", "throughput": 5552.28, "total_tokens": 10587248} +{"current_steps": 21510, "total_steps": 37885, "loss": 0.0809, "lr": 9.381217280327552e-07, "epoch": 2.838854427873829, "percentage": 56.78, "elapsed_time": "0:31:47", "remaining_time": "0:24:11", "throughput": 5552.75, "total_tokens": 10590000} +{"current_steps": 21515, "total_steps": 37885, "loss": 0.0, "lr": 9.376619193248314e-07, "epoch": 2.839514319651577, "percentage": 56.79, "elapsed_time": "0:31:47", "remaining_time": "0:24:11", "throughput": 5553.1, "total_tokens": 10592496} +{"current_steps": 21520, "total_steps": 37885, "loss": 0.0767, "lr": 9.372021238477138e-07, "epoch": 2.840174211429326, "percentage": 56.8, "elapsed_time": "0:31:47", "remaining_time": "0:24:10", "throughput": 5553.54, "total_tokens": 10595184} +{"current_steps": 21525, "total_steps": 37885, "loss": 0.001, "lr": 9.367423416989905e-07, "epoch": 2.840834103207074, "percentage": 56.82, "elapsed_time": "0:31:48", "remaining_time": "0:24:10", "throughput": 5553.82, "total_tokens": 10597552} +{"current_steps": 21530, "total_steps": 37885, "loss": 0.1518, "lr": 9.362825729762472e-07, "epoch": 2.8414939949848224, "percentage": 56.83, "elapsed_time": "0:31:48", "remaining_time": "0:24:09", "throughput": 5554.26, "total_tokens": 10600240} +{"current_steps": 21535, "total_steps": 37885, "loss": 0.1066, "lr": 9.358228177770663e-07, "epoch": 2.842153886762571, "percentage": 56.84, "elapsed_time": "0:31:48", "remaining_time": "0:24:09", "throughput": 5554.55, "total_tokens": 10602608} +{"current_steps": 21540, "total_steps": 37885, "loss": 0.0717, "lr": 9.353630761990276e-07, "epoch": 2.8428137785403194, "percentage": 56.86, "elapsed_time": "0:31:49", "remaining_time": "0:24:08", "throughput": 5554.9, "total_tokens": 10605104} +{"current_steps": 21545, "total_steps": 37885, "loss": 0.0023, "lr": 9.349033483397082e-07, "epoch": 2.843473670318068, "percentage": 56.87, "elapsed_time": "0:31:49", "remaining_time": "0:24:08", "throughput": 5555.25, "total_tokens": 10607600} +{"current_steps": 21550, "total_steps": 37885, "loss": 0.0004, "lr": 9.344436342966812e-07, "epoch": 2.8441335620958164, "percentage": 56.88, "elapsed_time": "0:31:49", "remaining_time": "0:24:07", "throughput": 5555.63, "total_tokens": 10610160} +{"current_steps": 21555, "total_steps": 37885, "loss": 0.0421, "lr": 9.339839341675185e-07, "epoch": 2.8447934538735646, "percentage": 56.9, "elapsed_time": "0:31:50", "remaining_time": "0:24:07", "throughput": 5555.85, "total_tokens": 10612400} +{"current_steps": 21560, "total_steps": 37885, "loss": 0.0003, "lr": 9.335242480497876e-07, "epoch": 2.845453345651313, "percentage": 56.91, "elapsed_time": "0:31:50", "remaining_time": "0:24:06", "throughput": 5556.3, "total_tokens": 10615088} +{"current_steps": 21565, "total_steps": 37885, "loss": 0.0002, "lr": 9.330645760410537e-07, "epoch": 2.8461132374290616, "percentage": 56.92, "elapsed_time": "0:31:50", "remaining_time": "0:24:06", "throughput": 5556.68, "total_tokens": 10617648} +{"current_steps": 21570, "total_steps": 37885, "loss": 0.0006, "lr": 9.326049182388789e-07, "epoch": 2.8467731292068104, "percentage": 56.94, "elapsed_time": "0:31:51", "remaining_time": "0:24:05", "throughput": 5557.12, "total_tokens": 10620336} +{"current_steps": 21575, "total_steps": 37885, "loss": 0.0001, "lr": 9.32145274740822e-07, "epoch": 2.8474330209845586, "percentage": 56.95, "elapsed_time": "0:31:51", "remaining_time": "0:24:04", "throughput": 5557.42, "total_tokens": 10622704} +{"current_steps": 21580, "total_steps": 37885, "loss": 0.0407, "lr": 9.316856456444392e-07, "epoch": 2.848092912762307, "percentage": 56.96, "elapsed_time": "0:31:51", "remaining_time": "0:24:04", "throughput": 5557.78, "total_tokens": 10625264} +{"current_steps": 21585, "total_steps": 37885, "loss": 0.0736, "lr": 9.312260310472833e-07, "epoch": 2.848752804540055, "percentage": 56.98, "elapsed_time": "0:31:52", "remaining_time": "0:24:03", "throughput": 5558.25, "total_tokens": 10628016} +{"current_steps": 21590, "total_steps": 37885, "loss": 0.0009, "lr": 9.307664310469046e-07, "epoch": 2.849412696317804, "percentage": 56.99, "elapsed_time": "0:31:52", "remaining_time": "0:24:03", "throughput": 5558.53, "total_tokens": 10630384} +{"current_steps": 21595, "total_steps": 37885, "loss": 0.0016, "lr": 9.303068457408497e-07, "epoch": 2.850072588095552, "percentage": 57.0, "elapsed_time": "0:31:52", "remaining_time": "0:24:02", "throughput": 5558.77, "total_tokens": 10632688} +{"current_steps": 21600, "total_steps": 37885, "loss": 0.0518, "lr": 9.298472752266615e-07, "epoch": 2.850732479873301, "percentage": 57.01, "elapsed_time": "0:31:53", "remaining_time": "0:24:02", "throughput": 5558.9, "total_tokens": 10634800} +{"current_steps": 21605, "total_steps": 37885, "loss": 0.0001, "lr": 9.293877196018816e-07, "epoch": 2.851392371651049, "percentage": 57.03, "elapsed_time": "0:31:53", "remaining_time": "0:24:01", "throughput": 5559.14, "total_tokens": 10637104} +{"current_steps": 21610, "total_steps": 37885, "loss": 0.0002, "lr": 9.289281789640465e-07, "epoch": 2.8520522634287975, "percentage": 57.04, "elapsed_time": "0:31:53", "remaining_time": "0:24:01", "throughput": 5559.36, "total_tokens": 10639408} +{"current_steps": 21615, "total_steps": 37885, "loss": 0.274, "lr": 9.28468653410691e-07, "epoch": 2.852712155206546, "percentage": 57.05, "elapsed_time": "0:31:54", "remaining_time": "0:24:00", "throughput": 5559.51, "total_tokens": 10641584} +{"current_steps": 21620, "total_steps": 37885, "loss": 0.0001, "lr": 9.280091430393462e-07, "epoch": 2.8533720469842945, "percentage": 57.07, "elapsed_time": "0:31:54", "remaining_time": "0:24:00", "throughput": 5559.73, "total_tokens": 10643888} +{"current_steps": 21625, "total_steps": 37885, "loss": 0.0001, "lr": 9.275496479475386e-07, "epoch": 2.854031938762043, "percentage": 57.08, "elapsed_time": "0:31:54", "remaining_time": "0:23:59", "throughput": 5560.11, "total_tokens": 10646512} +{"current_steps": 21630, "total_steps": 37885, "loss": 0.001, "lr": 9.270901682327945e-07, "epoch": 2.8546918305397915, "percentage": 57.09, "elapsed_time": "0:31:55", "remaining_time": "0:23:59", "throughput": 5560.37, "total_tokens": 10648880} +{"current_steps": 21635, "total_steps": 37885, "loss": 0.0012, "lr": 9.266307039926333e-07, "epoch": 2.8553517223175398, "percentage": 57.11, "elapsed_time": "0:31:55", "remaining_time": "0:23:58", "throughput": 5560.71, "total_tokens": 10651440} +{"current_steps": 21640, "total_steps": 37885, "loss": 0.0001, "lr": 9.261712553245747e-07, "epoch": 2.8560116140952885, "percentage": 57.12, "elapsed_time": "0:31:55", "remaining_time": "0:23:58", "throughput": 5561.06, "total_tokens": 10654000} +{"current_steps": 21645, "total_steps": 37885, "loss": 0.202, "lr": 9.257118223261323e-07, "epoch": 2.8566715058730368, "percentage": 57.13, "elapsed_time": "0:31:56", "remaining_time": "0:23:57", "throughput": 5561.39, "total_tokens": 10656560} +{"current_steps": 21650, "total_steps": 37885, "loss": 0.0427, "lr": 9.252524050948174e-07, "epoch": 2.8573313976507855, "percentage": 57.15, "elapsed_time": "0:31:56", "remaining_time": "0:23:57", "throughput": 5561.63, "total_tokens": 10658928} +{"current_steps": 21655, "total_steps": 37885, "loss": 0.0675, "lr": 9.247930037281385e-07, "epoch": 2.8579912894285338, "percentage": 57.16, "elapsed_time": "0:31:56", "remaining_time": "0:23:56", "throughput": 5561.92, "total_tokens": 10661360} +{"current_steps": 21660, "total_steps": 37885, "loss": 0.0338, "lr": 9.243336183235995e-07, "epoch": 2.858651181206282, "percentage": 57.17, "elapsed_time": "0:31:57", "remaining_time": "0:23:56", "throughput": 5562.27, "total_tokens": 10663920} +{"current_steps": 21665, "total_steps": 37885, "loss": 0.1112, "lr": 9.238742489787027e-07, "epoch": 2.8593110729840308, "percentage": 57.19, "elapsed_time": "0:31:57", "remaining_time": "0:23:55", "throughput": 5562.58, "total_tokens": 10666416} +{"current_steps": 21670, "total_steps": 37885, "loss": 0.0335, "lr": 9.234148957909451e-07, "epoch": 2.859970964761779, "percentage": 57.2, "elapsed_time": "0:31:57", "remaining_time": "0:23:55", "throughput": 5562.77, "total_tokens": 10668656} +{"current_steps": 21675, "total_steps": 37885, "loss": 0.1067, "lr": 9.229555588578211e-07, "epoch": 2.8606308565395278, "percentage": 57.21, "elapsed_time": "0:31:58", "remaining_time": "0:23:54", "throughput": 5563.09, "total_tokens": 10671152} +{"current_steps": 21680, "total_steps": 37885, "loss": 0.0001, "lr": 9.22496238276822e-07, "epoch": 2.861290748317276, "percentage": 57.23, "elapsed_time": "0:31:58", "remaining_time": "0:23:54", "throughput": 5563.28, "total_tokens": 10673392} +{"current_steps": 21685, "total_steps": 37885, "loss": 0.0005, "lr": 9.220369341454348e-07, "epoch": 2.8619506400950243, "percentage": 57.24, "elapsed_time": "0:31:58", "remaining_time": "0:23:53", "throughput": 5563.5, "total_tokens": 10675696} +{"current_steps": 21690, "total_steps": 37885, "loss": 0.0005, "lr": 9.215776465611441e-07, "epoch": 2.8626105318727726, "percentage": 57.25, "elapsed_time": "0:31:59", "remaining_time": "0:23:52", "throughput": 5563.69, "total_tokens": 10677936} +{"current_steps": 21695, "total_steps": 37885, "loss": 0.0022, "lr": 9.2111837562143e-07, "epoch": 2.8632704236505213, "percentage": 57.27, "elapsed_time": "0:31:59", "remaining_time": "0:23:52", "throughput": 5564.23, "total_tokens": 10680880} +{"current_steps": 21700, "total_steps": 37885, "loss": 0.0003, "lr": 9.206591214237692e-07, "epoch": 2.86393031542827, "percentage": 57.28, "elapsed_time": "0:31:59", "remaining_time": "0:23:51", "throughput": 5564.39, "total_tokens": 10683056} +{"current_steps": 21705, "total_steps": 37885, "loss": 0.1564, "lr": 9.201998840656355e-07, "epoch": 2.8645902072060183, "percentage": 57.29, "elapsed_time": "0:32:00", "remaining_time": "0:23:51", "throughput": 5564.69, "total_tokens": 10685552} +{"current_steps": 21710, "total_steps": 37885, "loss": 0.0002, "lr": 9.197406636444984e-07, "epoch": 2.8652500989837666, "percentage": 57.31, "elapsed_time": "0:32:00", "remaining_time": "0:23:50", "throughput": 5564.84, "total_tokens": 10687728} +{"current_steps": 21715, "total_steps": 37885, "loss": 0.0323, "lr": 9.192814602578245e-07, "epoch": 2.865909990761515, "percentage": 57.32, "elapsed_time": "0:32:00", "remaining_time": "0:23:50", "throughput": 5565.24, "total_tokens": 10690352} +{"current_steps": 21720, "total_steps": 37885, "loss": 0.1149, "lr": 9.188222740030759e-07, "epoch": 2.8665698825392636, "percentage": 57.33, "elapsed_time": "0:32:01", "remaining_time": "0:23:49", "throughput": 5565.73, "total_tokens": 10693168} +{"current_steps": 21725, "total_steps": 37885, "loss": 0.0007, "lr": 9.18363104977712e-07, "epoch": 2.867229774317012, "percentage": 57.34, "elapsed_time": "0:32:01", "remaining_time": "0:23:49", "throughput": 5566.04, "total_tokens": 10695600} +{"current_steps": 21730, "total_steps": 37885, "loss": 0.0816, "lr": 9.179039532791879e-07, "epoch": 2.8678896660947606, "percentage": 57.36, "elapsed_time": "0:32:01", "remaining_time": "0:23:48", "throughput": 5566.35, "total_tokens": 10698032} +{"current_steps": 21735, "total_steps": 37885, "loss": 0.0805, "lr": 9.174448190049551e-07, "epoch": 2.868549557872509, "percentage": 57.37, "elapsed_time": "0:32:02", "remaining_time": "0:23:48", "throughput": 5566.57, "total_tokens": 10700272} +{"current_steps": 21740, "total_steps": 37885, "loss": 0.0385, "lr": 9.169857022524616e-07, "epoch": 2.869209449650257, "percentage": 57.38, "elapsed_time": "0:32:02", "remaining_time": "0:23:47", "throughput": 5566.84, "total_tokens": 10702640} +{"current_steps": 21745, "total_steps": 37885, "loss": 0.1961, "lr": 9.165266031191518e-07, "epoch": 2.869869341428006, "percentage": 57.4, "elapsed_time": "0:32:02", "remaining_time": "0:23:47", "throughput": 5567.17, "total_tokens": 10705136} +{"current_steps": 21750, "total_steps": 37885, "loss": 0.0003, "lr": 9.160675217024659e-07, "epoch": 2.870529233205754, "percentage": 57.41, "elapsed_time": "0:32:03", "remaining_time": "0:23:46", "throughput": 5567.61, "total_tokens": 10707824} +{"current_steps": 21755, "total_steps": 37885, "loss": 0.1185, "lr": 9.156084580998409e-07, "epoch": 2.871189124983503, "percentage": 57.42, "elapsed_time": "0:32:03", "remaining_time": "0:23:46", "throughput": 5567.83, "total_tokens": 10710064} +{"current_steps": 21760, "total_steps": 37885, "loss": 0.0926, "lr": 9.151494124087093e-07, "epoch": 2.871849016761251, "percentage": 57.44, "elapsed_time": "0:32:03", "remaining_time": "0:23:45", "throughput": 5568.1, "total_tokens": 10712432} +{"current_steps": 21765, "total_steps": 37885, "loss": 0.0872, "lr": 9.146903847265008e-07, "epoch": 2.8725089085389994, "percentage": 57.45, "elapsed_time": "0:32:04", "remaining_time": "0:23:45", "throughput": 5568.31, "total_tokens": 10714672} +{"current_steps": 21770, "total_steps": 37885, "loss": 0.0009, "lr": 9.142313751506401e-07, "epoch": 2.873168800316748, "percentage": 57.46, "elapsed_time": "0:32:04", "remaining_time": "0:23:44", "throughput": 5568.53, "total_tokens": 10716912} +{"current_steps": 21775, "total_steps": 37885, "loss": 0.0009, "lr": 9.137723837785491e-07, "epoch": 2.8738286920944964, "percentage": 57.48, "elapsed_time": "0:32:04", "remaining_time": "0:23:44", "throughput": 5568.96, "total_tokens": 10719600} +{"current_steps": 21780, "total_steps": 37885, "loss": 0.0015, "lr": 9.133134107076455e-07, "epoch": 2.874488583872245, "percentage": 57.49, "elapsed_time": "0:32:05", "remaining_time": "0:23:43", "throughput": 5569.2, "total_tokens": 10721904} +{"current_steps": 21785, "total_steps": 37885, "loss": 0.0006, "lr": 9.12854456035342e-07, "epoch": 2.8751484756499934, "percentage": 57.5, "elapsed_time": "0:32:05", "remaining_time": "0:23:43", "throughput": 5569.7, "total_tokens": 10724720} +{"current_steps": 21790, "total_steps": 37885, "loss": 0.0011, "lr": 9.123955198590498e-07, "epoch": 2.8758083674277417, "percentage": 57.52, "elapsed_time": "0:32:05", "remaining_time": "0:23:42", "throughput": 5570.05, "total_tokens": 10727216} +{"current_steps": 21795, "total_steps": 37885, "loss": 0.0004, "lr": 9.119366022761736e-07, "epoch": 2.8764682592054904, "percentage": 57.53, "elapsed_time": "0:32:06", "remaining_time": "0:23:42", "throughput": 5570.37, "total_tokens": 10729648} +{"current_steps": 21800, "total_steps": 37885, "loss": 0.0005, "lr": 9.114777033841162e-07, "epoch": 2.8771281509832387, "percentage": 57.54, "elapsed_time": "0:32:06", "remaining_time": "0:23:41", "throughput": 5570.64, "total_tokens": 10732016} +{"current_steps": 21805, "total_steps": 37885, "loss": 0.058, "lr": 9.110188232802756e-07, "epoch": 2.8777880427609874, "percentage": 57.56, "elapsed_time": "0:32:06", "remaining_time": "0:23:40", "throughput": 5570.89, "total_tokens": 10734320} +{"current_steps": 21810, "total_steps": 37885, "loss": 0.0001, "lr": 9.105599620620446e-07, "epoch": 2.8784479345387357, "percentage": 57.57, "elapsed_time": "0:32:07", "remaining_time": "0:23:40", "throughput": 5571.32, "total_tokens": 10737008} +{"current_steps": 21815, "total_steps": 37885, "loss": 0.0938, "lr": 9.101011198268146e-07, "epoch": 2.879107826316484, "percentage": 57.58, "elapsed_time": "0:32:07", "remaining_time": "0:23:39", "throughput": 5571.72, "total_tokens": 10739632} +{"current_steps": 21820, "total_steps": 37885, "loss": 0.0004, "lr": 9.096422966719704e-07, "epoch": 2.8797677180942327, "percentage": 57.6, "elapsed_time": "0:32:07", "remaining_time": "0:23:39", "throughput": 5572.18, "total_tokens": 10742384} +{"current_steps": 21825, "total_steps": 37885, "loss": 0.1499, "lr": 9.091834926948949e-07, "epoch": 2.880427609871981, "percentage": 57.61, "elapsed_time": "0:32:08", "remaining_time": "0:23:38", "throughput": 5572.52, "total_tokens": 10744880} +{"current_steps": 21830, "total_steps": 37885, "loss": 0.0004, "lr": 9.087247079929654e-07, "epoch": 2.8810875016497297, "percentage": 57.62, "elapsed_time": "0:32:08", "remaining_time": "0:23:38", "throughput": 5572.97, "total_tokens": 10747632} +{"current_steps": 21835, "total_steps": 37885, "loss": 0.0001, "lr": 9.082659426635554e-07, "epoch": 2.881747393427478, "percentage": 57.63, "elapsed_time": "0:32:08", "remaining_time": "0:23:37", "throughput": 5573.31, "total_tokens": 10750128} +{"current_steps": 21840, "total_steps": 37885, "loss": 0.1334, "lr": 9.07807196804035e-07, "epoch": 2.8824072852052263, "percentage": 57.65, "elapsed_time": "0:32:09", "remaining_time": "0:23:37", "throughput": 5573.77, "total_tokens": 10752880} +{"current_steps": 21845, "total_steps": 37885, "loss": 0.1001, "lr": 9.073484705117691e-07, "epoch": 2.8830671769829745, "percentage": 57.66, "elapsed_time": "0:32:09", "remaining_time": "0:23:36", "throughput": 5574.17, "total_tokens": 10755504} +{"current_steps": 21850, "total_steps": 37885, "loss": 0.0001, "lr": 9.068897638841197e-07, "epoch": 2.8837270687607233, "percentage": 57.67, "elapsed_time": "0:32:09", "remaining_time": "0:23:36", "throughput": 5574.43, "total_tokens": 10757808} +{"current_steps": 21855, "total_steps": 37885, "loss": 0.0008, "lr": 9.064310770184438e-07, "epoch": 2.8843869605384715, "percentage": 57.69, "elapsed_time": "0:32:10", "remaining_time": "0:23:35", "throughput": 5574.83, "total_tokens": 10760432} +{"current_steps": 21860, "total_steps": 37885, "loss": 0.0008, "lr": 9.059724100120939e-07, "epoch": 2.8850468523162203, "percentage": 57.7, "elapsed_time": "0:32:10", "remaining_time": "0:23:35", "throughput": 5575.13, "total_tokens": 10762864} +{"current_steps": 21865, "total_steps": 37885, "loss": 0.0027, "lr": 9.055137629624194e-07, "epoch": 2.8857067440939685, "percentage": 57.71, "elapsed_time": "0:32:10", "remaining_time": "0:23:34", "throughput": 5575.42, "total_tokens": 10765232} +{"current_steps": 21870, "total_steps": 37885, "loss": 0.0253, "lr": 9.05055135966764e-07, "epoch": 2.886366635871717, "percentage": 57.73, "elapsed_time": "0:32:11", "remaining_time": "0:23:34", "throughput": 5575.7, "total_tokens": 10767600} +{"current_steps": 21875, "total_steps": 37885, "loss": 0.0001, "lr": 9.04596529122469e-07, "epoch": 2.8870265276494655, "percentage": 57.74, "elapsed_time": "0:32:11", "remaining_time": "0:23:33", "throughput": 5575.85, "total_tokens": 10769712} +{"current_steps": 21880, "total_steps": 37885, "loss": 0.0, "lr": 9.041379425268697e-07, "epoch": 2.887686419427214, "percentage": 57.75, "elapsed_time": "0:32:11", "remaining_time": "0:23:33", "throughput": 5576.07, "total_tokens": 10771952} +{"current_steps": 21885, "total_steps": 37885, "loss": 0.0676, "lr": 9.036793762772977e-07, "epoch": 2.8883463112049625, "percentage": 57.77, "elapsed_time": "0:32:12", "remaining_time": "0:23:32", "throughput": 5576.44, "total_tokens": 10774512} +{"current_steps": 21890, "total_steps": 37885, "loss": 0.0001, "lr": 9.032208304710808e-07, "epoch": 2.889006202982711, "percentage": 57.78, "elapsed_time": "0:32:12", "remaining_time": "0:23:32", "throughput": 5576.76, "total_tokens": 10776944} +{"current_steps": 21895, "total_steps": 37885, "loss": 0.0007, "lr": 9.027623052055417e-07, "epoch": 2.889666094760459, "percentage": 57.79, "elapsed_time": "0:32:12", "remaining_time": "0:23:31", "throughput": 5577.16, "total_tokens": 10779568} +{"current_steps": 21900, "total_steps": 37885, "loss": 0.0041, "lr": 9.023038005779992e-07, "epoch": 2.890325986538208, "percentage": 57.81, "elapsed_time": "0:32:13", "remaining_time": "0:23:31", "throughput": 5577.71, "total_tokens": 10782512} +{"current_steps": 21905, "total_steps": 37885, "loss": 0.0001, "lr": 9.018453166857677e-07, "epoch": 2.890985878315956, "percentage": 57.82, "elapsed_time": "0:32:13", "remaining_time": "0:23:30", "throughput": 5577.95, "total_tokens": 10784816} +{"current_steps": 21910, "total_steps": 37885, "loss": 0.0001, "lr": 9.013868536261566e-07, "epoch": 2.891645770093705, "percentage": 57.83, "elapsed_time": "0:32:13", "remaining_time": "0:23:29", "throughput": 5578.39, "total_tokens": 10787504} +{"current_steps": 21915, "total_steps": 37885, "loss": 0.0, "lr": 9.009284114964721e-07, "epoch": 2.892305661871453, "percentage": 57.85, "elapsed_time": "0:32:14", "remaining_time": "0:23:29", "throughput": 5578.7, "total_tokens": 10789936} +{"current_steps": 21920, "total_steps": 37885, "loss": 0.0004, "lr": 9.004699903940146e-07, "epoch": 2.8929655536492014, "percentage": 57.86, "elapsed_time": "0:32:14", "remaining_time": "0:23:28", "throughput": 5579.07, "total_tokens": 10792496} +{"current_steps": 21925, "total_steps": 37885, "loss": 0.1505, "lr": 9.000115904160811e-07, "epoch": 2.89362544542695, "percentage": 57.87, "elapsed_time": "0:32:14", "remaining_time": "0:23:28", "throughput": 5579.34, "total_tokens": 10794864} +{"current_steps": 21930, "total_steps": 37885, "loss": 0.0036, "lr": 8.995532116599636e-07, "epoch": 2.8942853372046984, "percentage": 57.89, "elapsed_time": "0:32:15", "remaining_time": "0:23:27", "throughput": 5579.62, "total_tokens": 10797232} +{"current_steps": 21935, "total_steps": 37885, "loss": 0.0001, "lr": 8.99094854222949e-07, "epoch": 2.894945228982447, "percentage": 57.9, "elapsed_time": "0:32:15", "remaining_time": "0:23:27", "throughput": 5579.96, "total_tokens": 10799728} +{"current_steps": 21940, "total_steps": 37885, "loss": 0.0537, "lr": 8.986365182023212e-07, "epoch": 2.8956051207601954, "percentage": 57.91, "elapsed_time": "0:32:15", "remaining_time": "0:23:26", "throughput": 5580.3, "total_tokens": 10802224} +{"current_steps": 21945, "total_steps": 37885, "loss": 0.0036, "lr": 8.981782036953583e-07, "epoch": 2.8962650125379437, "percentage": 57.93, "elapsed_time": "0:32:16", "remaining_time": "0:23:26", "throughput": 5580.58, "total_tokens": 10804592} +{"current_steps": 21950, "total_steps": 37885, "loss": 0.0397, "lr": 8.977199107993345e-07, "epoch": 2.8969249043156924, "percentage": 57.94, "elapsed_time": "0:32:16", "remaining_time": "0:23:25", "throughput": 5580.81, "total_tokens": 10806896} +{"current_steps": 21955, "total_steps": 37885, "loss": 0.0016, "lr": 8.972616396115194e-07, "epoch": 2.8975847960934407, "percentage": 57.95, "elapsed_time": "0:32:16", "remaining_time": "0:23:25", "throughput": 5581.12, "total_tokens": 10809328} +{"current_steps": 21960, "total_steps": 37885, "loss": 0.0631, "lr": 8.968033902291764e-07, "epoch": 2.8982446878711894, "percentage": 57.96, "elapsed_time": "0:32:17", "remaining_time": "0:23:24", "throughput": 5581.51, "total_tokens": 10811952} +{"current_steps": 21965, "total_steps": 37885, "loss": 0.0689, "lr": 8.963451627495673e-07, "epoch": 2.8989045796489377, "percentage": 57.98, "elapsed_time": "0:32:17", "remaining_time": "0:23:24", "throughput": 5581.76, "total_tokens": 10814256} +{"current_steps": 21970, "total_steps": 37885, "loss": 0.0617, "lr": 8.95886957269946e-07, "epoch": 2.899564471426686, "percentage": 57.99, "elapsed_time": "0:32:17", "remaining_time": "0:23:23", "throughput": 5582.03, "total_tokens": 10816624} +{"current_steps": 21975, "total_steps": 37885, "loss": 0.0001, "lr": 8.954287738875649e-07, "epoch": 2.900224363204434, "percentage": 58.0, "elapsed_time": "0:32:18", "remaining_time": "0:23:23", "throughput": 5582.4, "total_tokens": 10819184} +{"current_steps": 21980, "total_steps": 37885, "loss": 0.0006, "lr": 8.94970612699669e-07, "epoch": 2.900884254982183, "percentage": 58.02, "elapsed_time": "0:32:18", "remaining_time": "0:23:22", "throughput": 5582.71, "total_tokens": 10821616} +{"current_steps": 21985, "total_steps": 37885, "loss": 0.0308, "lr": 8.945124738034998e-07, "epoch": 2.901544146759931, "percentage": 58.03, "elapsed_time": "0:32:18", "remaining_time": "0:23:22", "throughput": 5582.96, "total_tokens": 10823920} +{"current_steps": 21990, "total_steps": 37885, "loss": 0.0003, "lr": 8.940543572962944e-07, "epoch": 2.90220403853768, "percentage": 58.04, "elapsed_time": "0:32:19", "remaining_time": "0:23:21", "throughput": 5583.25, "total_tokens": 10826288} +{"current_steps": 21995, "total_steps": 37885, "loss": 0.0, "lr": 8.93596263275284e-07, "epoch": 2.902863930315428, "percentage": 58.06, "elapsed_time": "0:32:19", "remaining_time": "0:23:21", "throughput": 5583.43, "total_tokens": 10828464} +{"current_steps": 22000, "total_steps": 37885, "loss": 0.1661, "lr": 8.931381918376969e-07, "epoch": 2.9035238220931765, "percentage": 58.07, "elapsed_time": "0:32:19", "remaining_time": "0:23:20", "throughput": 5583.78, "total_tokens": 10830960} +{"current_steps": 22005, "total_steps": 37885, "loss": 0.0767, "lr": 8.926801430807545e-07, "epoch": 2.904183713870925, "percentage": 58.08, "elapsed_time": "0:32:20", "remaining_time": "0:23:20", "throughput": 5583.96, "total_tokens": 10833136} +{"current_steps": 22010, "total_steps": 37885, "loss": 0.0001, "lr": 8.922221171016744e-07, "epoch": 2.9048436056486735, "percentage": 58.1, "elapsed_time": "0:32:20", "remaining_time": "0:23:19", "throughput": 5584.3, "total_tokens": 10835632} +{"current_steps": 22015, "total_steps": 37885, "loss": 0.0003, "lr": 8.917641139976697e-07, "epoch": 2.905503497426422, "percentage": 58.11, "elapsed_time": "0:32:20", "remaining_time": "0:23:18", "throughput": 5584.61, "total_tokens": 10838064} +{"current_steps": 22020, "total_steps": 37885, "loss": 0.0798, "lr": 8.913061338659478e-07, "epoch": 2.9061633892041705, "percentage": 58.12, "elapsed_time": "0:32:21", "remaining_time": "0:23:18", "throughput": 5584.85, "total_tokens": 10840368} +{"current_steps": 22025, "total_steps": 37885, "loss": 0.0282, "lr": 8.908481768037119e-07, "epoch": 2.9068232809819188, "percentage": 58.14, "elapsed_time": "0:32:21", "remaining_time": "0:23:17", "throughput": 5585.17, "total_tokens": 10842800} +{"current_steps": 22030, "total_steps": 37885, "loss": 0.0005, "lr": 8.903902429081603e-07, "epoch": 2.9074831727596675, "percentage": 58.15, "elapsed_time": "0:32:21", "remaining_time": "0:23:17", "throughput": 5585.56, "total_tokens": 10845424} +{"current_steps": 22035, "total_steps": 37885, "loss": 0.0004, "lr": 8.899323322764857e-07, "epoch": 2.908143064537416, "percentage": 58.16, "elapsed_time": "0:32:22", "remaining_time": "0:23:16", "throughput": 5585.9, "total_tokens": 10847920} +{"current_steps": 22040, "total_steps": 37885, "loss": 0.0001, "lr": 8.894744450058767e-07, "epoch": 2.9088029563151645, "percentage": 58.18, "elapsed_time": "0:32:22", "remaining_time": "0:23:16", "throughput": 5586.12, "total_tokens": 10850160} +{"current_steps": 22045, "total_steps": 37885, "loss": 0.0661, "lr": 8.890165811935161e-07, "epoch": 2.909462848092913, "percentage": 58.19, "elapsed_time": "0:32:22", "remaining_time": "0:23:15", "throughput": 5586.33, "total_tokens": 10852400} +{"current_steps": 22050, "total_steps": 37885, "loss": 0.1177, "lr": 8.885587409365826e-07, "epoch": 2.910122739870661, "percentage": 58.2, "elapsed_time": "0:32:23", "remaining_time": "0:23:15", "throughput": 5586.63, "total_tokens": 10854832} +{"current_steps": 22055, "total_steps": 37885, "loss": 0.0018, "lr": 8.881009243322493e-07, "epoch": 2.91078263164841, "percentage": 58.22, "elapsed_time": "0:32:23", "remaining_time": "0:23:14", "throughput": 5587.04, "total_tokens": 10857456} +{"current_steps": 22060, "total_steps": 37885, "loss": 0.0011, "lr": 8.876431314776847e-07, "epoch": 2.911442523426158, "percentage": 58.23, "elapsed_time": "0:32:23", "remaining_time": "0:23:14", "throughput": 5587.43, "total_tokens": 10860080} +{"current_steps": 22065, "total_steps": 37885, "loss": 0.0001, "lr": 8.871853624700517e-07, "epoch": 2.912102415203907, "percentage": 58.24, "elapsed_time": "0:32:23", "remaining_time": "0:23:13", "throughput": 5587.81, "total_tokens": 10862640} +{"current_steps": 22070, "total_steps": 37885, "loss": 0.0494, "lr": 8.867276174065085e-07, "epoch": 2.912762306981655, "percentage": 58.26, "elapsed_time": "0:32:24", "remaining_time": "0:23:13", "throughput": 5588.03, "total_tokens": 10864880} +{"current_steps": 22075, "total_steps": 37885, "loss": 0.0501, "lr": 8.862698963842084e-07, "epoch": 2.9134221987594033, "percentage": 58.27, "elapsed_time": "0:32:24", "remaining_time": "0:23:12", "throughput": 5588.21, "total_tokens": 10867056} +{"current_steps": 22080, "total_steps": 37885, "loss": 0.0003, "lr": 8.85812199500299e-07, "epoch": 2.914082090537152, "percentage": 58.28, "elapsed_time": "0:32:24", "remaining_time": "0:23:12", "throughput": 5588.49, "total_tokens": 10869424} +{"current_steps": 22085, "total_steps": 37885, "loss": 0.0876, "lr": 8.853545268519235e-07, "epoch": 2.9147419823149003, "percentage": 58.29, "elapsed_time": "0:32:25", "remaining_time": "0:23:11", "throughput": 5588.86, "total_tokens": 10871984} +{"current_steps": 22090, "total_steps": 37885, "loss": 0.0002, "lr": 8.848968785362196e-07, "epoch": 2.915401874092649, "percentage": 58.31, "elapsed_time": "0:32:25", "remaining_time": "0:23:11", "throughput": 5589.13, "total_tokens": 10874352} +{"current_steps": 22095, "total_steps": 37885, "loss": 0.0004, "lr": 8.844392546503195e-07, "epoch": 2.9160617658703973, "percentage": 58.32, "elapsed_time": "0:32:25", "remaining_time": "0:23:10", "throughput": 5589.35, "total_tokens": 10876592} +{"current_steps": 22100, "total_steps": 37885, "loss": 0.0537, "lr": 8.83981655291351e-07, "epoch": 2.9167216576481456, "percentage": 58.33, "elapsed_time": "0:32:26", "remaining_time": "0:23:10", "throughput": 5589.66, "total_tokens": 10879024} +{"current_steps": 22105, "total_steps": 37885, "loss": 0.0757, "lr": 8.835240805564358e-07, "epoch": 2.917381549425894, "percentage": 58.35, "elapsed_time": "0:32:26", "remaining_time": "0:23:09", "throughput": 5590.02, "total_tokens": 10881584} +{"current_steps": 22110, "total_steps": 37885, "loss": 0.0, "lr": 8.830665305426914e-07, "epoch": 2.9180414412036426, "percentage": 58.36, "elapsed_time": "0:32:26", "remaining_time": "0:23:09", "throughput": 5590.39, "total_tokens": 10884144} +{"current_steps": 22115, "total_steps": 37885, "loss": 0.1362, "lr": 8.826090053472291e-07, "epoch": 2.918701332981391, "percentage": 58.37, "elapsed_time": "0:32:27", "remaining_time": "0:23:08", "throughput": 5590.82, "total_tokens": 10886832} +{"current_steps": 22120, "total_steps": 37885, "loss": 0.0011, "lr": 8.821515050671547e-07, "epoch": 2.9193612247591396, "percentage": 58.39, "elapsed_time": "0:32:27", "remaining_time": "0:23:08", "throughput": 5590.94, "total_tokens": 10888880} +{"current_steps": 22125, "total_steps": 37885, "loss": 0.0005, "lr": 8.816940297995705e-07, "epoch": 2.920021116536888, "percentage": 58.4, "elapsed_time": "0:32:27", "remaining_time": "0:23:07", "throughput": 5591.22, "total_tokens": 10891248} +{"current_steps": 22130, "total_steps": 37885, "loss": 0.0001, "lr": 8.812365796415715e-07, "epoch": 2.920681008314636, "percentage": 58.41, "elapsed_time": "0:32:28", "remaining_time": "0:23:07", "throughput": 5591.48, "total_tokens": 10893552} +{"current_steps": 22135, "total_steps": 37885, "loss": 0.0004, "lr": 8.807791546902488e-07, "epoch": 2.921340900092385, "percentage": 58.43, "elapsed_time": "0:32:28", "remaining_time": "0:23:06", "throughput": 5591.76, "total_tokens": 10895920} +{"current_steps": 22140, "total_steps": 37885, "loss": 0.0001, "lr": 8.803217550426873e-07, "epoch": 2.922000791870133, "percentage": 58.44, "elapsed_time": "0:32:28", "remaining_time": "0:23:05", "throughput": 5592.17, "total_tokens": 10898608} +{"current_steps": 22145, "total_steps": 37885, "loss": 0.0001, "lr": 8.79864380795966e-07, "epoch": 2.922660683647882, "percentage": 58.45, "elapsed_time": "0:32:29", "remaining_time": "0:23:05", "throughput": 5592.48, "total_tokens": 10901040} +{"current_steps": 22150, "total_steps": 37885, "loss": 0.094, "lr": 8.794070320471605e-07, "epoch": 2.92332057542563, "percentage": 58.47, "elapsed_time": "0:32:29", "remaining_time": "0:23:04", "throughput": 5592.81, "total_tokens": 10903536} +{"current_steps": 22155, "total_steps": 37885, "loss": 0.1084, "lr": 8.789497088933386e-07, "epoch": 2.9239804672033785, "percentage": 58.48, "elapsed_time": "0:32:29", "remaining_time": "0:23:04", "throughput": 5593.11, "total_tokens": 10905968} +{"current_steps": 22160, "total_steps": 37885, "loss": 0.1881, "lr": 8.78492411431565e-07, "epoch": 2.924640358981127, "percentage": 58.49, "elapsed_time": "0:32:30", "remaining_time": "0:23:03", "throughput": 5593.54, "total_tokens": 10908656} +{"current_steps": 22165, "total_steps": 37885, "loss": 0.0003, "lr": 8.78035139758897e-07, "epoch": 2.9253002507588755, "percentage": 58.51, "elapsed_time": "0:32:30", "remaining_time": "0:23:03", "throughput": 5593.78, "total_tokens": 10910960} +{"current_steps": 22170, "total_steps": 37885, "loss": 0.0003, "lr": 8.775778939723874e-07, "epoch": 2.925960142536624, "percentage": 58.52, "elapsed_time": "0:32:30", "remaining_time": "0:23:02", "throughput": 5594.17, "total_tokens": 10913584} +{"current_steps": 22175, "total_steps": 37885, "loss": 0.0009, "lr": 8.771206741690832e-07, "epoch": 2.9266200343143725, "percentage": 58.53, "elapsed_time": "0:32:31", "remaining_time": "0:23:02", "throughput": 5594.57, "total_tokens": 10916208} +{"current_steps": 22180, "total_steps": 37885, "loss": 0.0003, "lr": 8.76663480446026e-07, "epoch": 2.9272799260921207, "percentage": 58.55, "elapsed_time": "0:32:31", "remaining_time": "0:23:01", "throughput": 5594.9, "total_tokens": 10918704} +{"current_steps": 22185, "total_steps": 37885, "loss": 0.0007, "lr": 8.762063129002521e-07, "epoch": 2.9279398178698695, "percentage": 58.56, "elapsed_time": "0:32:31", "remaining_time": "0:23:01", "throughput": 5595.23, "total_tokens": 10921200} +{"current_steps": 22190, "total_steps": 37885, "loss": 0.0567, "lr": 8.757491716287919e-07, "epoch": 2.9285997096476177, "percentage": 58.57, "elapsed_time": "0:32:32", "remaining_time": "0:23:00", "throughput": 5595.51, "total_tokens": 10923568} +{"current_steps": 22195, "total_steps": 37885, "loss": 0.0356, "lr": 8.752920567286701e-07, "epoch": 2.9292596014253665, "percentage": 58.59, "elapsed_time": "0:32:32", "remaining_time": "0:23:00", "throughput": 5595.76, "total_tokens": 10925872} +{"current_steps": 22200, "total_steps": 37885, "loss": 0.0273, "lr": 8.748349682969063e-07, "epoch": 2.9299194932031147, "percentage": 58.6, "elapsed_time": "0:32:32", "remaining_time": "0:22:59", "throughput": 5596.15, "total_tokens": 10928496} +{"current_steps": 22205, "total_steps": 37885, "loss": 0.0001, "lr": 8.743779064305139e-07, "epoch": 2.930579384980863, "percentage": 58.61, "elapsed_time": "0:32:33", "remaining_time": "0:22:59", "throughput": 5596.33, "total_tokens": 10930672} +{"current_steps": 22210, "total_steps": 37885, "loss": 0.0002, "lr": 8.739208712265015e-07, "epoch": 2.9312392767586117, "percentage": 58.62, "elapsed_time": "0:32:33", "remaining_time": "0:22:58", "throughput": 5596.66, "total_tokens": 10933168} +{"current_steps": 22215, "total_steps": 37885, "loss": 0.1551, "lr": 8.734638627818711e-07, "epoch": 2.93189916853636, "percentage": 58.64, "elapsed_time": "0:32:33", "remaining_time": "0:22:58", "throughput": 5596.9, "total_tokens": 10935472} +{"current_steps": 22220, "total_steps": 37885, "loss": 0.1055, "lr": 8.730068811936194e-07, "epoch": 2.9325590603141087, "percentage": 58.65, "elapsed_time": "0:32:34", "remaining_time": "0:22:57", "throughput": 5597.37, "total_tokens": 10938288} +{"current_steps": 22225, "total_steps": 37885, "loss": 0.052, "lr": 8.725499265587376e-07, "epoch": 2.933218952091857, "percentage": 58.66, "elapsed_time": "0:32:34", "remaining_time": "0:22:57", "throughput": 5597.61, "total_tokens": 10940592} +{"current_steps": 22230, "total_steps": 37885, "loss": 0.0675, "lr": 8.720929989742108e-07, "epoch": 2.9338788438696053, "percentage": 58.68, "elapsed_time": "0:32:34", "remaining_time": "0:22:56", "throughput": 5597.82, "total_tokens": 10942832} +{"current_steps": 22235, "total_steps": 37885, "loss": 0.0006, "lr": 8.71636098537019e-07, "epoch": 2.9345387356473536, "percentage": 58.69, "elapsed_time": "0:32:35", "remaining_time": "0:22:56", "throughput": 5598.21, "total_tokens": 10945456} +{"current_steps": 22240, "total_steps": 37885, "loss": 0.001, "lr": 8.711792253441358e-07, "epoch": 2.9351986274251023, "percentage": 58.7, "elapsed_time": "0:32:35", "remaining_time": "0:22:55", "throughput": 5598.54, "total_tokens": 10947952} +{"current_steps": 22245, "total_steps": 37885, "loss": 0.0002, "lr": 8.70722379492529e-07, "epoch": 2.9358585192028506, "percentage": 58.72, "elapsed_time": "0:32:35", "remaining_time": "0:22:55", "throughput": 5598.9, "total_tokens": 10950512} +{"current_steps": 22250, "total_steps": 37885, "loss": 0.0006, "lr": 8.70265561079161e-07, "epoch": 2.9365184109805993, "percentage": 58.73, "elapsed_time": "0:32:36", "remaining_time": "0:22:54", "throughput": 5599.29, "total_tokens": 10953136} +{"current_steps": 22255, "total_steps": 37885, "loss": 0.0665, "lr": 8.698087702009882e-07, "epoch": 2.9371783027583476, "percentage": 58.74, "elapsed_time": "0:32:36", "remaining_time": "0:22:54", "throughput": 5599.54, "total_tokens": 10955440} +{"current_steps": 22260, "total_steps": 37885, "loss": 0.0892, "lr": 8.693520069549612e-07, "epoch": 2.937838194536096, "percentage": 58.76, "elapsed_time": "0:32:36", "remaining_time": "0:22:53", "throughput": 5599.94, "total_tokens": 10958064} +{"current_steps": 22265, "total_steps": 37885, "loss": 0.0043, "lr": 8.688952714380247e-07, "epoch": 2.9384980863138446, "percentage": 58.77, "elapsed_time": "0:32:37", "remaining_time": "0:22:53", "throughput": 5600.33, "total_tokens": 10960688} +{"current_steps": 22270, "total_steps": 37885, "loss": 0.0382, "lr": 8.684385637471173e-07, "epoch": 2.939157978091593, "percentage": 58.78, "elapsed_time": "0:32:37", "remaining_time": "0:22:52", "throughput": 5600.64, "total_tokens": 10963120} +{"current_steps": 22275, "total_steps": 37885, "loss": 0.3035, "lr": 8.679818839791721e-07, "epoch": 2.9398178698693416, "percentage": 58.8, "elapsed_time": "0:32:37", "remaining_time": "0:22:52", "throughput": 5600.96, "total_tokens": 10965616} +{"current_steps": 22280, "total_steps": 37885, "loss": 0.0004, "lr": 8.675252322311161e-07, "epoch": 2.94047776164709, "percentage": 58.81, "elapsed_time": "0:32:38", "remaining_time": "0:22:51", "throughput": 5601.3, "total_tokens": 10968112} +{"current_steps": 22285, "total_steps": 37885, "loss": 0.0023, "lr": 8.670686085998702e-07, "epoch": 2.941137653424838, "percentage": 58.82, "elapsed_time": "0:32:38", "remaining_time": "0:22:50", "throughput": 5601.61, "total_tokens": 10970544} +{"current_steps": 22290, "total_steps": 37885, "loss": 0.0015, "lr": 8.666120131823499e-07, "epoch": 2.941797545202587, "percentage": 58.84, "elapsed_time": "0:32:38", "remaining_time": "0:22:50", "throughput": 5601.93, "total_tokens": 10973040} +{"current_steps": 22295, "total_steps": 37885, "loss": 0.1724, "lr": 8.661554460754631e-07, "epoch": 2.942457436980335, "percentage": 58.85, "elapsed_time": "0:32:39", "remaining_time": "0:22:49", "throughput": 5602.38, "total_tokens": 10975792} +{"current_steps": 22300, "total_steps": 37885, "loss": 0.1864, "lr": 8.656989073761144e-07, "epoch": 2.943117328758084, "percentage": 58.86, "elapsed_time": "0:32:39", "remaining_time": "0:22:49", "throughput": 5602.77, "total_tokens": 10978416} +{"current_steps": 22305, "total_steps": 37885, "loss": 0.0476, "lr": 8.652423971811992e-07, "epoch": 2.943777220535832, "percentage": 58.88, "elapsed_time": "0:32:39", "remaining_time": "0:22:48", "throughput": 5603.29, "total_tokens": 10981296} +{"current_steps": 22310, "total_steps": 37885, "loss": 0.1013, "lr": 8.647859155876103e-07, "epoch": 2.9444371123135804, "percentage": 58.89, "elapsed_time": "0:32:40", "remaining_time": "0:22:48", "throughput": 5603.59, "total_tokens": 10983728} +{"current_steps": 22315, "total_steps": 37885, "loss": 0.0019, "lr": 8.643294626922314e-07, "epoch": 2.945097004091329, "percentage": 58.9, "elapsed_time": "0:32:40", "remaining_time": "0:22:47", "throughput": 5603.98, "total_tokens": 10986352} +{"current_steps": 22320, "total_steps": 37885, "loss": 0.0014, "lr": 8.638730385919411e-07, "epoch": 2.9457568958690774, "percentage": 58.92, "elapsed_time": "0:32:40", "remaining_time": "0:22:47", "throughput": 5604.4, "total_tokens": 10989040} +{"current_steps": 22325, "total_steps": 37885, "loss": 0.0004, "lr": 8.634166433836132e-07, "epoch": 2.946416787646826, "percentage": 58.93, "elapsed_time": "0:32:41", "remaining_time": "0:22:46", "throughput": 5604.65, "total_tokens": 10991344} +{"current_steps": 22330, "total_steps": 37885, "loss": 0.0746, "lr": 8.629602771641131e-07, "epoch": 2.9470766794245744, "percentage": 58.94, "elapsed_time": "0:32:41", "remaining_time": "0:22:46", "throughput": 5604.91, "total_tokens": 10993712} +{"current_steps": 22335, "total_steps": 37885, "loss": 0.1069, "lr": 8.625039400303025e-07, "epoch": 2.9477365712023227, "percentage": 58.95, "elapsed_time": "0:32:41", "remaining_time": "0:22:45", "throughput": 5605.15, "total_tokens": 10996016} +{"current_steps": 22340, "total_steps": 37885, "loss": 0.1457, "lr": 8.620476320790346e-07, "epoch": 2.9483964629800714, "percentage": 58.97, "elapsed_time": "0:32:42", "remaining_time": "0:22:45", "throughput": 5605.49, "total_tokens": 10998512} +{"current_steps": 22345, "total_steps": 37885, "loss": 0.0385, "lr": 8.615913534071577e-07, "epoch": 2.9490563547578197, "percentage": 58.98, "elapsed_time": "0:32:42", "remaining_time": "0:22:44", "throughput": 5605.9, "total_tokens": 11001200} +{"current_steps": 22350, "total_steps": 37885, "loss": 0.0011, "lr": 8.61135104111514e-07, "epoch": 2.9497162465355684, "percentage": 58.99, "elapsed_time": "0:32:42", "remaining_time": "0:22:44", "throughput": 5606.19, "total_tokens": 11003632} +{"current_steps": 22355, "total_steps": 37885, "loss": 0.0005, "lr": 8.606788842889387e-07, "epoch": 2.9503761383133167, "percentage": 59.01, "elapsed_time": "0:32:43", "remaining_time": "0:22:43", "throughput": 5606.43, "total_tokens": 11005936} +{"current_steps": 22360, "total_steps": 37885, "loss": 0.0006, "lr": 8.602226940362615e-07, "epoch": 2.951036030091065, "percentage": 59.02, "elapsed_time": "0:32:43", "remaining_time": "0:22:43", "throughput": 5606.85, "total_tokens": 11008624} +{"current_steps": 22365, "total_steps": 37885, "loss": 0.0893, "lr": 8.59766533450305e-07, "epoch": 2.9516959218688132, "percentage": 59.03, "elapsed_time": "0:32:43", "remaining_time": "0:22:42", "throughput": 5607.12, "total_tokens": 11010992} +{"current_steps": 22370, "total_steps": 37885, "loss": 0.2024, "lr": 8.593104026278866e-07, "epoch": 2.952355813646562, "percentage": 59.05, "elapsed_time": "0:32:44", "remaining_time": "0:22:42", "throughput": 5607.54, "total_tokens": 11013680} +{"current_steps": 22375, "total_steps": 37885, "loss": 0.0256, "lr": 8.588543016658164e-07, "epoch": 2.9530157054243107, "percentage": 59.06, "elapsed_time": "0:32:44", "remaining_time": "0:22:41", "throughput": 5607.78, "total_tokens": 11015984} +{"current_steps": 22380, "total_steps": 37885, "loss": 0.0559, "lr": 8.583982306608984e-07, "epoch": 2.953675597202059, "percentage": 59.07, "elapsed_time": "0:32:44", "remaining_time": "0:22:41", "throughput": 5607.98, "total_tokens": 11018224} +{"current_steps": 22385, "total_steps": 37885, "loss": 0.0007, "lr": 8.579421897099307e-07, "epoch": 2.9543354889798072, "percentage": 59.09, "elapsed_time": "0:32:45", "remaining_time": "0:22:40", "throughput": 5608.21, "total_tokens": 11020528} +{"current_steps": 22390, "total_steps": 37885, "loss": 0.0417, "lr": 8.574861789097043e-07, "epoch": 2.9549953807575555, "percentage": 59.1, "elapsed_time": "0:32:45", "remaining_time": "0:22:40", "throughput": 5608.57, "total_tokens": 11023088} +{"current_steps": 22395, "total_steps": 37885, "loss": 0.0681, "lr": 8.570301983570048e-07, "epoch": 2.9556552725353042, "percentage": 59.11, "elapsed_time": "0:32:45", "remaining_time": "0:22:39", "throughput": 5609.02, "total_tokens": 11025840} +{"current_steps": 22400, "total_steps": 37885, "loss": 0.0693, "lr": 8.565742481486102e-07, "epoch": 2.9563151643130525, "percentage": 59.13, "elapsed_time": "0:32:46", "remaining_time": "0:22:39", "throughput": 5609.19, "total_tokens": 11028016} +{"current_steps": 22405, "total_steps": 37885, "loss": 0.0002, "lr": 8.561183283812928e-07, "epoch": 2.9569750560908012, "percentage": 59.14, "elapsed_time": "0:32:46", "remaining_time": "0:22:38", "throughput": 5609.37, "total_tokens": 11030192} +{"current_steps": 22410, "total_steps": 37885, "loss": 0.0005, "lr": 8.556624391518182e-07, "epoch": 2.9576349478685495, "percentage": 59.15, "elapsed_time": "0:32:46", "remaining_time": "0:22:38", "throughput": 5609.7, "total_tokens": 11032688} +{"current_steps": 22415, "total_steps": 37885, "loss": 0.0344, "lr": 8.552065805569457e-07, "epoch": 2.958294839646298, "percentage": 59.17, "elapsed_time": "0:32:47", "remaining_time": "0:22:37", "throughput": 5610.06, "total_tokens": 11035248} +{"current_steps": 22420, "total_steps": 37885, "loss": 0.0848, "lr": 8.547507526934281e-07, "epoch": 2.9589547314240465, "percentage": 59.18, "elapsed_time": "0:32:47", "remaining_time": "0:22:37", "throughput": 5610.4, "total_tokens": 11037808} +{"current_steps": 22425, "total_steps": 37885, "loss": 0.0006, "lr": 8.542949556580114e-07, "epoch": 2.959614623201795, "percentage": 59.19, "elapsed_time": "0:32:47", "remaining_time": "0:22:36", "throughput": 5610.57, "total_tokens": 11039984} +{"current_steps": 22430, "total_steps": 37885, "loss": 0.0662, "lr": 8.538391895474353e-07, "epoch": 2.9602745149795435, "percentage": 59.21, "elapsed_time": "0:32:48", "remaining_time": "0:22:36", "throughput": 5610.94, "total_tokens": 11042544} +{"current_steps": 22435, "total_steps": 37885, "loss": 0.0357, "lr": 8.533834544584327e-07, "epoch": 2.960934406757292, "percentage": 59.22, "elapsed_time": "0:32:48", "remaining_time": "0:22:35", "throughput": 5611.33, "total_tokens": 11045168} +{"current_steps": 22440, "total_steps": 37885, "loss": 0.043, "lr": 8.529277504877301e-07, "epoch": 2.96159429853504, "percentage": 59.23, "elapsed_time": "0:32:48", "remaining_time": "0:22:35", "throughput": 5611.72, "total_tokens": 11047792} +{"current_steps": 22445, "total_steps": 37885, "loss": 0.1004, "lr": 8.524720777320476e-07, "epoch": 2.962254190312789, "percentage": 59.25, "elapsed_time": "0:32:49", "remaining_time": "0:22:34", "throughput": 5611.98, "total_tokens": 11050160} +{"current_steps": 22450, "total_steps": 37885, "loss": 0.0013, "lr": 8.520164362880986e-07, "epoch": 2.962914082090537, "percentage": 59.26, "elapsed_time": "0:32:49", "remaining_time": "0:22:33", "throughput": 5612.35, "total_tokens": 11052720} +{"current_steps": 22455, "total_steps": 37885, "loss": 0.0006, "lr": 8.515608262525886e-07, "epoch": 2.963573973868286, "percentage": 59.27, "elapsed_time": "0:32:49", "remaining_time": "0:22:33", "throughput": 5612.55, "total_tokens": 11054960} +{"current_steps": 22460, "total_steps": 37885, "loss": 0.0002, "lr": 8.511052477222189e-07, "epoch": 2.964233865646034, "percentage": 59.28, "elapsed_time": "0:32:50", "remaining_time": "0:22:32", "throughput": 5613.0, "total_tokens": 11057712} +{"current_steps": 22465, "total_steps": 37885, "loss": 0.0526, "lr": 8.50649700793682e-07, "epoch": 2.9648937574237824, "percentage": 59.3, "elapsed_time": "0:32:50", "remaining_time": "0:22:32", "throughput": 5613.42, "total_tokens": 11060400} +{"current_steps": 22470, "total_steps": 37885, "loss": 0.0001, "lr": 8.501941855636645e-07, "epoch": 2.965553649201531, "percentage": 59.31, "elapsed_time": "0:32:50", "remaining_time": "0:22:31", "throughput": 5613.87, "total_tokens": 11063152} +{"current_steps": 22475, "total_steps": 37885, "loss": 0.0368, "lr": 8.497387021288468e-07, "epoch": 2.9662135409792794, "percentage": 59.32, "elapsed_time": "0:32:51", "remaining_time": "0:22:31", "throughput": 5614.31, "total_tokens": 11065904} +{"current_steps": 22480, "total_steps": 37885, "loss": 0.0007, "lr": 8.492832505859007e-07, "epoch": 2.966873432757028, "percentage": 59.34, "elapsed_time": "0:32:51", "remaining_time": "0:22:30", "throughput": 5614.56, "total_tokens": 11068272} +{"current_steps": 22485, "total_steps": 37885, "loss": 0.0004, "lr": 8.488278310314939e-07, "epoch": 2.9675333245347764, "percentage": 59.35, "elapsed_time": "0:32:51", "remaining_time": "0:22:30", "throughput": 5614.83, "total_tokens": 11070640} +{"current_steps": 22490, "total_steps": 37885, "loss": 0.0015, "lr": 8.483724435622847e-07, "epoch": 2.9681932163125246, "percentage": 59.36, "elapsed_time": "0:32:52", "remaining_time": "0:22:29", "throughput": 5615.16, "total_tokens": 11073136} +{"current_steps": 22495, "total_steps": 37885, "loss": 0.0001, "lr": 8.479170882749269e-07, "epoch": 2.968853108090273, "percentage": 59.38, "elapsed_time": "0:32:52", "remaining_time": "0:22:29", "throughput": 5615.6, "total_tokens": 11075888} +{"current_steps": 22500, "total_steps": 37885, "loss": 0.0995, "lr": 8.474617652660657e-07, "epoch": 2.9695129998680216, "percentage": 59.39, "elapsed_time": "0:32:52", "remaining_time": "0:22:28", "throughput": 5615.97, "total_tokens": 11078448} +{"current_steps": 22505, "total_steps": 37885, "loss": 0.0001, "lr": 8.470064746323399e-07, "epoch": 2.9701728916457704, "percentage": 59.4, "elapsed_time": "0:32:52", "remaining_time": "0:22:28", "throughput": 5616.36, "total_tokens": 11081072} +{"current_steps": 22510, "total_steps": 37885, "loss": 0.0007, "lr": 8.465512164703823e-07, "epoch": 2.9708327834235186, "percentage": 59.42, "elapsed_time": "0:32:53", "remaining_time": "0:22:27", "throughput": 5616.65, "total_tokens": 11083504} +{"current_steps": 22515, "total_steps": 37885, "loss": 0.0006, "lr": 8.460959908768173e-07, "epoch": 2.971492675201267, "percentage": 59.43, "elapsed_time": "0:32:53", "remaining_time": "0:22:27", "throughput": 5616.92, "total_tokens": 11085872} +{"current_steps": 22520, "total_steps": 37885, "loss": 0.0751, "lr": 8.456407979482645e-07, "epoch": 2.972152566979015, "percentage": 59.44, "elapsed_time": "0:32:53", "remaining_time": "0:22:26", "throughput": 5617.24, "total_tokens": 11088368} +{"current_steps": 22525, "total_steps": 37885, "loss": 0.0005, "lr": 8.451856377813342e-07, "epoch": 2.972812458756764, "percentage": 59.46, "elapsed_time": "0:32:54", "remaining_time": "0:22:26", "throughput": 5617.58, "total_tokens": 11090864} +{"current_steps": 22530, "total_steps": 37885, "loss": 0.0004, "lr": 8.44730510472631e-07, "epoch": 2.973472350534512, "percentage": 59.47, "elapsed_time": "0:32:54", "remaining_time": "0:22:25", "throughput": 5617.96, "total_tokens": 11093488} +{"current_steps": 22535, "total_steps": 37885, "loss": 0.0001, "lr": 8.442754161187528e-07, "epoch": 2.974132242312261, "percentage": 59.48, "elapsed_time": "0:32:54", "remaining_time": "0:22:25", "throughput": 5618.19, "total_tokens": 11095792} +{"current_steps": 22540, "total_steps": 37885, "loss": 0.0001, "lr": 8.438203548162898e-07, "epoch": 2.974792134090009, "percentage": 59.5, "elapsed_time": "0:32:55", "remaining_time": "0:22:24", "throughput": 5618.52, "total_tokens": 11098288} +{"current_steps": 22545, "total_steps": 37885, "loss": 0.0257, "lr": 8.433653266618255e-07, "epoch": 2.9754520258677575, "percentage": 59.51, "elapsed_time": "0:32:55", "remaining_time": "0:22:24", "throughput": 5618.74, "total_tokens": 11100528} +{"current_steps": 22550, "total_steps": 37885, "loss": 0.0707, "lr": 8.429103317519366e-07, "epoch": 2.976111917645506, "percentage": 59.52, "elapsed_time": "0:32:55", "remaining_time": "0:22:23", "throughput": 5619.12, "total_tokens": 11103152} +{"current_steps": 22555, "total_steps": 37885, "loss": 0.0073, "lr": 8.424553701831919e-07, "epoch": 2.9767718094232545, "percentage": 59.54, "elapsed_time": "0:32:56", "remaining_time": "0:22:23", "throughput": 5619.53, "total_tokens": 11105840} +{"current_steps": 22560, "total_steps": 37885, "loss": 0.0646, "lr": 8.420004420521542e-07, "epoch": 2.977431701201003, "percentage": 59.55, "elapsed_time": "0:32:56", "remaining_time": "0:22:22", "throughput": 5619.68, "total_tokens": 11107952} +{"current_steps": 22565, "total_steps": 37885, "loss": 0.0, "lr": 8.415455474553784e-07, "epoch": 2.9780915929787515, "percentage": 59.56, "elapsed_time": "0:32:56", "remaining_time": "0:22:22", "throughput": 5619.98, "total_tokens": 11110384} +{"current_steps": 22570, "total_steps": 37885, "loss": 0.0783, "lr": 8.41090686489413e-07, "epoch": 2.9787514847564998, "percentage": 59.58, "elapsed_time": "0:32:57", "remaining_time": "0:22:21", "throughput": 5620.34, "total_tokens": 11112944} +{"current_steps": 22575, "total_steps": 37885, "loss": 0.1547, "lr": 8.406358592507985e-07, "epoch": 2.9794113765342485, "percentage": 59.59, "elapsed_time": "0:32:57", "remaining_time": "0:22:21", "throughput": 5620.61, "total_tokens": 11115312} +{"current_steps": 22580, "total_steps": 37885, "loss": 0.1548, "lr": 8.401810658360686e-07, "epoch": 2.9800712683119968, "percentage": 59.6, "elapsed_time": "0:32:57", "remaining_time": "0:22:20", "throughput": 5620.96, "total_tokens": 11117872} +{"current_steps": 22585, "total_steps": 37885, "loss": 0.0782, "lr": 8.397263063417506e-07, "epoch": 2.9807311600897455, "percentage": 59.61, "elapsed_time": "0:32:58", "remaining_time": "0:22:20", "throughput": 5621.41, "total_tokens": 11120624} +{"current_steps": 22590, "total_steps": 37885, "loss": 0.0001, "lr": 8.39271580864363e-07, "epoch": 2.9813910518674938, "percentage": 59.63, "elapsed_time": "0:32:58", "remaining_time": "0:22:19", "throughput": 5621.74, "total_tokens": 11123120} +{"current_steps": 22595, "total_steps": 37885, "loss": 0.071, "lr": 8.388168895004189e-07, "epoch": 2.982050943645242, "percentage": 59.64, "elapsed_time": "0:32:58", "remaining_time": "0:22:19", "throughput": 5622.04, "total_tokens": 11125552} +{"current_steps": 22600, "total_steps": 37885, "loss": 0.1256, "lr": 8.383622323464226e-07, "epoch": 2.9827108354229908, "percentage": 59.65, "elapsed_time": "0:32:59", "remaining_time": "0:22:18", "throughput": 5622.42, "total_tokens": 11128176} +{"current_steps": 22605, "total_steps": 37885, "loss": 0.0751, "lr": 8.379076094988718e-07, "epoch": 2.983370727200739, "percentage": 59.67, "elapsed_time": "0:32:59", "remaining_time": "0:22:18", "throughput": 5622.66, "total_tokens": 11130480} +{"current_steps": 22610, "total_steps": 37885, "loss": 0.1028, "lr": 8.374530210542575e-07, "epoch": 2.9840306189784878, "percentage": 59.68, "elapsed_time": "0:32:59", "remaining_time": "0:22:17", "throughput": 5622.94, "total_tokens": 11132848} +{"current_steps": 22615, "total_steps": 37885, "loss": 0.0007, "lr": 8.369984671090621e-07, "epoch": 2.984690510756236, "percentage": 59.69, "elapsed_time": "0:33:00", "remaining_time": "0:22:17", "throughput": 5623.19, "total_tokens": 11135152} +{"current_steps": 22620, "total_steps": 37885, "loss": 0.019, "lr": 8.365439477597619e-07, "epoch": 2.9853504025339843, "percentage": 59.71, "elapsed_time": "0:33:00", "remaining_time": "0:22:16", "throughput": 5623.51, "total_tokens": 11137648} +{"current_steps": 22625, "total_steps": 37885, "loss": 0.0387, "lr": 8.360894631028254e-07, "epoch": 2.986010294311733, "percentage": 59.72, "elapsed_time": "0:33:00", "remaining_time": "0:22:16", "throughput": 5623.7, "total_tokens": 11139888} +{"current_steps": 22630, "total_steps": 37885, "loss": 0.0004, "lr": 8.356350132347127e-07, "epoch": 2.9866701860894813, "percentage": 59.73, "elapsed_time": "0:33:01", "remaining_time": "0:22:15", "throughput": 5624.09, "total_tokens": 11142512} +{"current_steps": 22635, "total_steps": 37885, "loss": 0.0013, "lr": 8.351805982518788e-07, "epoch": 2.98733007786723, "percentage": 59.75, "elapsed_time": "0:33:01", "remaining_time": "0:22:15", "throughput": 5624.33, "total_tokens": 11144816} +{"current_steps": 22640, "total_steps": 37885, "loss": 0.0283, "lr": 8.347262182507688e-07, "epoch": 2.9879899696449783, "percentage": 59.76, "elapsed_time": "0:33:01", "remaining_time": "0:22:14", "throughput": 5624.66, "total_tokens": 11147312} +{"current_steps": 22645, "total_steps": 37885, "loss": 0.0003, "lr": 8.342718733278228e-07, "epoch": 2.9886498614227266, "percentage": 59.77, "elapsed_time": "0:33:02", "remaining_time": "0:22:14", "throughput": 5624.86, "total_tokens": 11149552} +{"current_steps": 22650, "total_steps": 37885, "loss": 0.0004, "lr": 8.338175635794713e-07, "epoch": 2.989309753200475, "percentage": 59.79, "elapsed_time": "0:33:02", "remaining_time": "0:22:13", "throughput": 5625.16, "total_tokens": 11151984} +{"current_steps": 22655, "total_steps": 37885, "loss": 0.1013, "lr": 8.333632891021383e-07, "epoch": 2.9899696449782236, "percentage": 59.8, "elapsed_time": "0:33:02", "remaining_time": "0:22:12", "throughput": 5625.52, "total_tokens": 11154544} +{"current_steps": 22660, "total_steps": 37885, "loss": 0.0648, "lr": 8.32909049992241e-07, "epoch": 2.990629536755972, "percentage": 59.81, "elapsed_time": "0:33:03", "remaining_time": "0:22:12", "throughput": 5625.84, "total_tokens": 11157040} +{"current_steps": 22665, "total_steps": 37885, "loss": 0.1119, "lr": 8.324548463461871e-07, "epoch": 2.9912894285337206, "percentage": 59.83, "elapsed_time": "0:33:03", "remaining_time": "0:22:11", "throughput": 5626.11, "total_tokens": 11159408} +{"current_steps": 22670, "total_steps": 37885, "loss": 0.0001, "lr": 8.320006782603797e-07, "epoch": 2.991949320311469, "percentage": 59.84, "elapsed_time": "0:33:03", "remaining_time": "0:22:11", "throughput": 5626.46, "total_tokens": 11161968} +{"current_steps": 22675, "total_steps": 37885, "loss": 0.0006, "lr": 8.315465458312114e-07, "epoch": 2.992609212089217, "percentage": 59.85, "elapsed_time": "0:33:04", "remaining_time": "0:22:10", "throughput": 5626.73, "total_tokens": 11164336} +{"current_steps": 22680, "total_steps": 37885, "loss": 0.0551, "lr": 8.310924491550688e-07, "epoch": 2.993269103866966, "percentage": 59.87, "elapsed_time": "0:33:04", "remaining_time": "0:22:10", "throughput": 5627.06, "total_tokens": 11166832} +{"current_steps": 22685, "total_steps": 37885, "loss": 0.0014, "lr": 8.306383883283308e-07, "epoch": 2.993928995644714, "percentage": 59.88, "elapsed_time": "0:33:04", "remaining_time": "0:22:09", "throughput": 5627.18, "total_tokens": 11168880} +{"current_steps": 22690, "total_steps": 37885, "loss": 0.0004, "lr": 8.301843634473683e-07, "epoch": 2.994588887422463, "percentage": 59.89, "elapsed_time": "0:33:05", "remaining_time": "0:22:09", "throughput": 5627.41, "total_tokens": 11171184} +{"current_steps": 22695, "total_steps": 37885, "loss": 0.0201, "lr": 8.297303746085452e-07, "epoch": 2.995248779200211, "percentage": 59.9, "elapsed_time": "0:33:05", "remaining_time": "0:22:08", "throughput": 5627.74, "total_tokens": 11173680} +{"current_steps": 22700, "total_steps": 37885, "loss": 0.0418, "lr": 8.292764219082168e-07, "epoch": 2.9959086709779594, "percentage": 59.92, "elapsed_time": "0:33:05", "remaining_time": "0:22:08", "throughput": 5628.09, "total_tokens": 11176240} +{"current_steps": 22705, "total_steps": 37885, "loss": 0.0009, "lr": 8.28822505442732e-07, "epoch": 2.996568562755708, "percentage": 59.93, "elapsed_time": "0:33:06", "remaining_time": "0:22:07", "throughput": 5628.36, "total_tokens": 11178608} +{"current_steps": 22710, "total_steps": 37885, "loss": 0.0041, "lr": 8.283686253084306e-07, "epoch": 2.9972284545334564, "percentage": 59.94, "elapsed_time": "0:33:06", "remaining_time": "0:22:07", "throughput": 5628.8, "total_tokens": 11181360} +{"current_steps": 22715, "total_steps": 37885, "loss": 0.0644, "lr": 8.279147816016455e-07, "epoch": 2.997888346311205, "percentage": 59.96, "elapsed_time": "0:33:06", "remaining_time": "0:22:06", "throughput": 5629.13, "total_tokens": 11183856} +{"current_steps": 22720, "total_steps": 37885, "loss": 0.0427, "lr": 8.274609744187021e-07, "epoch": 2.9985482380889534, "percentage": 59.97, "elapsed_time": "0:33:07", "remaining_time": "0:22:06", "throughput": 5629.57, "total_tokens": 11186608} +{"current_steps": 22725, "total_steps": 37885, "loss": 0.0002, "lr": 8.270072038559172e-07, "epoch": 2.9992081298667017, "percentage": 59.98, "elapsed_time": "0:33:07", "remaining_time": "0:22:05", "throughput": 5629.8, "total_tokens": 11188912} +{"current_steps": 22730, "total_steps": 37885, "loss": 0.0001, "lr": 8.265534700096008e-07, "epoch": 2.9998680216444504, "percentage": 60.0, "elapsed_time": "0:33:07", "remaining_time": "0:22:05", "throughput": 5630.14, "total_tokens": 11191408} +{"current_steps": 22735, "total_steps": 37885, "loss": 0.0006, "lr": 8.260997729760544e-07, "epoch": 3.0005279134221987, "percentage": 60.01, "elapsed_time": "0:33:08", "remaining_time": "0:22:04", "throughput": 5630.07, "total_tokens": 11193728} +{"current_steps": 22740, "total_steps": 37885, "loss": 0.0352, "lr": 8.256461128515717e-07, "epoch": 3.001187805199947, "percentage": 60.02, "elapsed_time": "0:33:08", "remaining_time": "0:22:04", "throughput": 5630.33, "total_tokens": 11196096} +{"current_steps": 22740, "total_steps": 37885, "eval_loss": 0.14833371341228485, "epoch": 3.001187805199947, "percentage": 60.02, "elapsed_time": "0:33:16", "remaining_time": "0:22:09", "throughput": 5607.95, "total_tokens": 11196096} +{"current_steps": 22745, "total_steps": 37885, "loss": 0.0003, "lr": 8.251924897324392e-07, "epoch": 3.0018476969776957, "percentage": 60.04, "elapsed_time": "0:33:48", "remaining_time": "0:22:30", "throughput": 5521.45, "total_tokens": 11198656} +{"current_steps": 22750, "total_steps": 37885, "loss": 0.0, "lr": 8.247389037149346e-07, "epoch": 3.002507588755444, "percentage": 60.05, "elapsed_time": "0:33:48", "remaining_time": "0:22:29", "throughput": 5521.75, "total_tokens": 11201088} +{"current_steps": 22755, "total_steps": 37885, "loss": 0.0, "lr": 8.242853548953288e-07, "epoch": 3.0031674805331927, "percentage": 60.06, "elapsed_time": "0:33:48", "remaining_time": "0:22:29", "throughput": 5522.12, "total_tokens": 11203648} +{"current_steps": 22760, "total_steps": 37885, "loss": 0.0, "lr": 8.238318433698841e-07, "epoch": 3.003827372310941, "percentage": 60.08, "elapsed_time": "0:33:49", "remaining_time": "0:22:28", "throughput": 5522.57, "total_tokens": 11206400} +{"current_steps": 22765, "total_steps": 37885, "loss": 0.0, "lr": 8.233783692348546e-07, "epoch": 3.0044872640886893, "percentage": 60.09, "elapsed_time": "0:33:49", "remaining_time": "0:22:27", "throughput": 5522.9, "total_tokens": 11208896} +{"current_steps": 22770, "total_steps": 37885, "loss": 0.0016, "lr": 8.229249325864874e-07, "epoch": 3.005147155866438, "percentage": 60.1, "elapsed_time": "0:33:49", "remaining_time": "0:22:27", "throughput": 5523.21, "total_tokens": 11211328} +{"current_steps": 22775, "total_steps": 37885, "loss": 0.0581, "lr": 8.224715335210208e-07, "epoch": 3.0058070476441863, "percentage": 60.12, "elapsed_time": "0:33:50", "remaining_time": "0:22:26", "throughput": 5523.66, "total_tokens": 11214080} +{"current_steps": 22780, "total_steps": 37885, "loss": 0.0001, "lr": 8.22018172134686e-07, "epoch": 3.006466939421935, "percentage": 60.13, "elapsed_time": "0:33:50", "remaining_time": "0:22:26", "throughput": 5523.88, "total_tokens": 11216320} +{"current_steps": 22785, "total_steps": 37885, "loss": 0.0502, "lr": 8.215648485237054e-07, "epoch": 3.0071268311996833, "percentage": 60.14, "elapsed_time": "0:33:50", "remaining_time": "0:22:25", "throughput": 5524.24, "total_tokens": 11218880} +{"current_steps": 22790, "total_steps": 37885, "loss": 0.0004, "lr": 8.211115627842931e-07, "epoch": 3.0077867229774315, "percentage": 60.16, "elapsed_time": "0:33:51", "remaining_time": "0:22:25", "throughput": 5524.54, "total_tokens": 11221312} +{"current_steps": 22795, "total_steps": 37885, "loss": 0.0, "lr": 8.206583150126564e-07, "epoch": 3.0084466147551803, "percentage": 60.17, "elapsed_time": "0:33:51", "remaining_time": "0:22:24", "throughput": 5524.73, "total_tokens": 11223488} +{"current_steps": 22800, "total_steps": 37885, "loss": 0.0001, "lr": 8.202051053049936e-07, "epoch": 3.0091065065329285, "percentage": 60.18, "elapsed_time": "0:33:51", "remaining_time": "0:22:24", "throughput": 5524.94, "total_tokens": 11225728} +{"current_steps": 22805, "total_steps": 37885, "loss": 0.0006, "lr": 8.197519337574953e-07, "epoch": 3.009766398310677, "percentage": 60.2, "elapsed_time": "0:33:52", "remaining_time": "0:22:23", "throughput": 5525.12, "total_tokens": 11227904} +{"current_steps": 22810, "total_steps": 37885, "loss": 0.0, "lr": 8.192988004663442e-07, "epoch": 3.0104262900884255, "percentage": 60.21, "elapsed_time": "0:33:52", "remaining_time": "0:22:23", "throughput": 5525.48, "total_tokens": 11230464} +{"current_steps": 22815, "total_steps": 37885, "loss": 0.0004, "lr": 8.188457055277133e-07, "epoch": 3.011086181866174, "percentage": 60.22, "elapsed_time": "0:33:52", "remaining_time": "0:22:22", "throughput": 5525.81, "total_tokens": 11232960} +{"current_steps": 22820, "total_steps": 37885, "loss": 0.0001, "lr": 8.183926490377703e-07, "epoch": 3.0117460736439225, "percentage": 60.23, "elapsed_time": "0:33:53", "remaining_time": "0:22:22", "throughput": 5526.14, "total_tokens": 11235456} +{"current_steps": 22825, "total_steps": 37885, "loss": 0.0007, "lr": 8.179396310926719e-07, "epoch": 3.012405965421671, "percentage": 60.25, "elapsed_time": "0:33:53", "remaining_time": "0:22:21", "throughput": 5526.45, "total_tokens": 11237888} +{"current_steps": 22830, "total_steps": 37885, "loss": 0.0003, "lr": 8.17486651788569e-07, "epoch": 3.013065857199419, "percentage": 60.26, "elapsed_time": "0:33:53", "remaining_time": "0:22:21", "throughput": 5526.87, "total_tokens": 11240576} +{"current_steps": 22835, "total_steps": 37885, "loss": 0.0001, "lr": 8.170337112216023e-07, "epoch": 3.013725748977168, "percentage": 60.27, "elapsed_time": "0:33:54", "remaining_time": "0:22:20", "throughput": 5527.05, "total_tokens": 11242752} +{"current_steps": 22840, "total_steps": 37885, "loss": 0.0, "lr": 8.165808094879054e-07, "epoch": 3.014385640754916, "percentage": 60.29, "elapsed_time": "0:33:54", "remaining_time": "0:22:20", "throughput": 5527.36, "total_tokens": 11245184} +{"current_steps": 22845, "total_steps": 37885, "loss": 0.0, "lr": 8.161279466836036e-07, "epoch": 3.015045532532665, "percentage": 60.3, "elapsed_time": "0:33:54", "remaining_time": "0:22:19", "throughput": 5527.63, "total_tokens": 11247552} +{"current_steps": 22850, "total_steps": 37885, "loss": 0.0005, "lr": 8.156751229048132e-07, "epoch": 3.015705424310413, "percentage": 60.31, "elapsed_time": "0:33:55", "remaining_time": "0:22:19", "throughput": 5527.88, "total_tokens": 11249856} +{"current_steps": 22855, "total_steps": 37885, "loss": 0.0, "lr": 8.152223382476438e-07, "epoch": 3.0163653160881614, "percentage": 60.33, "elapsed_time": "0:33:55", "remaining_time": "0:22:18", "throughput": 5528.32, "total_tokens": 11252608} +{"current_steps": 22860, "total_steps": 37885, "loss": 0.0, "lr": 8.14769592808195e-07, "epoch": 3.01702520786591, "percentage": 60.34, "elapsed_time": "0:33:55", "remaining_time": "0:22:18", "throughput": 5528.61, "total_tokens": 11255040} +{"current_steps": 22865, "total_steps": 37885, "loss": 0.0565, "lr": 8.143168866825583e-07, "epoch": 3.0176850996436584, "percentage": 60.35, "elapsed_time": "0:33:56", "remaining_time": "0:22:17", "throughput": 5528.97, "total_tokens": 11257600} +{"current_steps": 22870, "total_steps": 37885, "loss": 0.0, "lr": 8.138642199668183e-07, "epoch": 3.018344991421407, "percentage": 60.37, "elapsed_time": "0:33:56", "remaining_time": "0:22:16", "throughput": 5529.21, "total_tokens": 11259904} +{"current_steps": 22875, "total_steps": 37885, "loss": 0.1032, "lr": 8.134115927570493e-07, "epoch": 3.0190048831991554, "percentage": 60.38, "elapsed_time": "0:33:56", "remaining_time": "0:22:16", "throughput": 5529.48, "total_tokens": 11262272} +{"current_steps": 22880, "total_steps": 37885, "loss": 0.0014, "lr": 8.129590051493189e-07, "epoch": 3.0196647749769037, "percentage": 60.39, "elapsed_time": "0:33:57", "remaining_time": "0:22:15", "throughput": 5529.69, "total_tokens": 11264512} +{"current_steps": 22885, "total_steps": 37885, "loss": 0.0036, "lr": 8.125064572396851e-07, "epoch": 3.0203246667546524, "percentage": 60.41, "elapsed_time": "0:33:57", "remaining_time": "0:22:15", "throughput": 5530.02, "total_tokens": 11267008} +{"current_steps": 22890, "total_steps": 37885, "loss": 0.0001, "lr": 8.12053949124198e-07, "epoch": 3.0209845585324007, "percentage": 60.42, "elapsed_time": "0:33:57", "remaining_time": "0:22:14", "throughput": 5530.38, "total_tokens": 11269568} +{"current_steps": 22895, "total_steps": 37885, "loss": 0.0002, "lr": 8.116014808988993e-07, "epoch": 3.021644450310149, "percentage": 60.43, "elapsed_time": "0:33:58", "remaining_time": "0:22:14", "throughput": 5530.76, "total_tokens": 11272192} +{"current_steps": 22900, "total_steps": 37885, "loss": 0.0002, "lr": 8.111490526598217e-07, "epoch": 3.0223043420878977, "percentage": 60.45, "elapsed_time": "0:33:58", "remaining_time": "0:22:13", "throughput": 5531.23, "total_tokens": 11275008} +{"current_steps": 22905, "total_steps": 37885, "loss": 0.0367, "lr": 8.106966645029905e-07, "epoch": 3.022964233865646, "percentage": 60.46, "elapsed_time": "0:33:58", "remaining_time": "0:22:13", "throughput": 5531.49, "total_tokens": 11277376} +{"current_steps": 22910, "total_steps": 37885, "loss": 0.0626, "lr": 8.102443165244213e-07, "epoch": 3.0236241256433947, "percentage": 60.47, "elapsed_time": "0:33:59", "remaining_time": "0:22:12", "throughput": 5531.84, "total_tokens": 11279936} +{"current_steps": 22915, "total_steps": 37885, "loss": 0.0, "lr": 8.097920088201216e-07, "epoch": 3.024284017421143, "percentage": 60.49, "elapsed_time": "0:33:59", "remaining_time": "0:22:12", "throughput": 5532.15, "total_tokens": 11282432} +{"current_steps": 22920, "total_steps": 37885, "loss": 0.0323, "lr": 8.09339741486091e-07, "epoch": 3.0249439091988912, "percentage": 60.5, "elapsed_time": "0:33:59", "remaining_time": "0:22:11", "throughput": 5532.59, "total_tokens": 11285184} +{"current_steps": 22925, "total_steps": 37885, "loss": 0.0, "lr": 8.088875146183192e-07, "epoch": 3.02560380097664, "percentage": 60.51, "elapsed_time": "0:34:00", "remaining_time": "0:22:11", "throughput": 5532.95, "total_tokens": 11287744} +{"current_steps": 22930, "total_steps": 37885, "loss": 0.0, "lr": 8.084353283127889e-07, "epoch": 3.0262636927543882, "percentage": 60.53, "elapsed_time": "0:34:00", "remaining_time": "0:22:10", "throughput": 5533.17, "total_tokens": 11289984} +{"current_steps": 22935, "total_steps": 37885, "loss": 0.0457, "lr": 8.079831826654729e-07, "epoch": 3.026923584532137, "percentage": 60.54, "elapsed_time": "0:34:00", "remaining_time": "0:22:10", "throughput": 5533.47, "total_tokens": 11292416} +{"current_steps": 22940, "total_steps": 37885, "loss": 0.0001, "lr": 8.075310777723357e-07, "epoch": 3.0275834763098852, "percentage": 60.55, "elapsed_time": "0:34:01", "remaining_time": "0:22:09", "throughput": 5533.89, "total_tokens": 11295104} +{"current_steps": 22945, "total_steps": 37885, "loss": 0.0, "lr": 8.070790137293338e-07, "epoch": 3.0282433680876335, "percentage": 60.56, "elapsed_time": "0:34:01", "remaining_time": "0:22:09", "throughput": 5534.08, "total_tokens": 11297280} +{"current_steps": 22950, "total_steps": 37885, "loss": 0.0783, "lr": 8.066269906324138e-07, "epoch": 3.0289032598653822, "percentage": 60.58, "elapsed_time": "0:34:01", "remaining_time": "0:22:08", "throughput": 5534.36, "total_tokens": 11299648} +{"current_steps": 22955, "total_steps": 37885, "loss": 0.0002, "lr": 8.061750085775151e-07, "epoch": 3.0295631516431305, "percentage": 60.59, "elapsed_time": "0:34:02", "remaining_time": "0:22:08", "throughput": 5534.66, "total_tokens": 11302080} +{"current_steps": 22960, "total_steps": 37885, "loss": 0.0538, "lr": 8.057230676605673e-07, "epoch": 3.030223043420879, "percentage": 60.6, "elapsed_time": "0:34:02", "remaining_time": "0:22:07", "throughput": 5535.13, "total_tokens": 11304896} +{"current_steps": 22965, "total_steps": 37885, "loss": 0.0, "lr": 8.05271167977491e-07, "epoch": 3.0308829351986275, "percentage": 60.62, "elapsed_time": "0:34:02", "remaining_time": "0:22:07", "throughput": 5535.43, "total_tokens": 11307328} +{"current_steps": 22970, "total_steps": 37885, "loss": 0.0, "lr": 8.048193096241999e-07, "epoch": 3.031542826976376, "percentage": 60.63, "elapsed_time": "0:34:03", "remaining_time": "0:22:06", "throughput": 5535.59, "total_tokens": 11309440} +{"current_steps": 22975, "total_steps": 37885, "loss": 0.0001, "lr": 8.043674926965962e-07, "epoch": 3.0322027187541245, "percentage": 60.64, "elapsed_time": "0:34:03", "remaining_time": "0:22:06", "throughput": 5535.92, "total_tokens": 11311936} +{"current_steps": 22980, "total_steps": 37885, "loss": 0.0834, "lr": 8.039157172905762e-07, "epoch": 3.032862610531873, "percentage": 60.66, "elapsed_time": "0:34:03", "remaining_time": "0:22:05", "throughput": 5536.31, "total_tokens": 11314560} +{"current_steps": 22985, "total_steps": 37885, "loss": 0.0001, "lr": 8.034639835020251e-07, "epoch": 3.033522502309621, "percentage": 60.67, "elapsed_time": "0:34:04", "remaining_time": "0:22:05", "throughput": 5536.58, "total_tokens": 11316992} +{"current_steps": 22990, "total_steps": 37885, "loss": 0.0002, "lr": 8.030122914268198e-07, "epoch": 3.03418239408737, "percentage": 60.68, "elapsed_time": "0:34:04", "remaining_time": "0:22:04", "throughput": 5536.96, "total_tokens": 11319616} +{"current_steps": 22995, "total_steps": 37885, "loss": 0.0005, "lr": 8.025606411608299e-07, "epoch": 3.034842285865118, "percentage": 60.7, "elapsed_time": "0:34:04", "remaining_time": "0:22:04", "throughput": 5537.27, "total_tokens": 11322112} +{"current_steps": 23000, "total_steps": 37885, "loss": 0.0002, "lr": 8.021090327999135e-07, "epoch": 3.035502177642867, "percentage": 60.71, "elapsed_time": "0:34:05", "remaining_time": "0:22:03", "throughput": 5537.53, "total_tokens": 11324480} +{"current_steps": 23005, "total_steps": 37885, "loss": 0.0004, "lr": 8.016574664399225e-07, "epoch": 3.036162069420615, "percentage": 60.72, "elapsed_time": "0:34:05", "remaining_time": "0:22:02", "throughput": 5537.87, "total_tokens": 11327040} +{"current_steps": 23010, "total_steps": 37885, "loss": 0.0001, "lr": 8.012059421766972e-07, "epoch": 3.0368219611983633, "percentage": 60.74, "elapsed_time": "0:34:05", "remaining_time": "0:22:02", "throughput": 5538.14, "total_tokens": 11329408} +{"current_steps": 23015, "total_steps": 37885, "loss": 0.0, "lr": 8.007544601060719e-07, "epoch": 3.037481852976112, "percentage": 60.75, "elapsed_time": "0:34:06", "remaining_time": "0:22:01", "throughput": 5538.52, "total_tokens": 11332032} +{"current_steps": 23020, "total_steps": 37885, "loss": 0.0002, "lr": 8.003030203238694e-07, "epoch": 3.0381417447538603, "percentage": 60.76, "elapsed_time": "0:34:06", "remaining_time": "0:22:01", "throughput": 5538.82, "total_tokens": 11334528} +{"current_steps": 23025, "total_steps": 37885, "loss": 0.0004, "lr": 7.998516229259045e-07, "epoch": 3.0388016365316086, "percentage": 60.78, "elapsed_time": "0:34:06", "remaining_time": "0:22:00", "throughput": 5539.17, "total_tokens": 11337088} +{"current_steps": 23030, "total_steps": 37885, "loss": 0.0, "lr": 7.994002680079835e-07, "epoch": 3.0394615283093573, "percentage": 60.79, "elapsed_time": "0:34:07", "remaining_time": "0:22:00", "throughput": 5539.49, "total_tokens": 11339584} +{"current_steps": 23035, "total_steps": 37885, "loss": 0.0, "lr": 7.989489556659028e-07, "epoch": 3.0401214200871056, "percentage": 60.8, "elapsed_time": "0:34:07", "remaining_time": "0:21:59", "throughput": 5539.84, "total_tokens": 11342144} +{"current_steps": 23040, "total_steps": 37885, "loss": 0.0001, "lr": 7.984976859954506e-07, "epoch": 3.0407813118648543, "percentage": 60.82, "elapsed_time": "0:34:07", "remaining_time": "0:21:59", "throughput": 5540.31, "total_tokens": 11345024} +{"current_steps": 23045, "total_steps": 37885, "loss": 0.0001, "lr": 7.980464590924054e-07, "epoch": 3.0414412036426026, "percentage": 60.83, "elapsed_time": "0:34:08", "remaining_time": "0:21:58", "throughput": 5540.6, "total_tokens": 11347456} +{"current_steps": 23050, "total_steps": 37885, "loss": 0.02, "lr": 7.975952750525366e-07, "epoch": 3.042101095420351, "percentage": 60.84, "elapsed_time": "0:34:08", "remaining_time": "0:21:58", "throughput": 5540.84, "total_tokens": 11349760} +{"current_steps": 23055, "total_steps": 37885, "loss": 0.0, "lr": 7.97144133971605e-07, "epoch": 3.0427609871980996, "percentage": 60.86, "elapsed_time": "0:34:08", "remaining_time": "0:21:57", "throughput": 5541.26, "total_tokens": 11352512} +{"current_steps": 23060, "total_steps": 37885, "loss": 0.0, "lr": 7.966930359453619e-07, "epoch": 3.043420878975848, "percentage": 60.87, "elapsed_time": "0:34:09", "remaining_time": "0:21:57", "throughput": 5541.55, "total_tokens": 11354944} +{"current_steps": 23065, "total_steps": 37885, "loss": 0.0003, "lr": 7.9624198106955e-07, "epoch": 3.0440807707535966, "percentage": 60.88, "elapsed_time": "0:34:09", "remaining_time": "0:21:56", "throughput": 5541.81, "total_tokens": 11357312} +{"current_steps": 23070, "total_steps": 37885, "loss": 0.1689, "lr": 7.957909694399019e-07, "epoch": 3.044740662531345, "percentage": 60.89, "elapsed_time": "0:34:09", "remaining_time": "0:21:56", "throughput": 5542.18, "total_tokens": 11359936} +{"current_steps": 23075, "total_steps": 37885, "loss": 0.0, "lr": 7.953400011521417e-07, "epoch": 3.045400554309093, "percentage": 60.91, "elapsed_time": "0:34:10", "remaining_time": "0:21:55", "throughput": 5542.39, "total_tokens": 11362240} +{"current_steps": 23080, "total_steps": 37885, "loss": 0.0002, "lr": 7.948890763019845e-07, "epoch": 3.046060446086842, "percentage": 60.92, "elapsed_time": "0:34:10", "remaining_time": "0:21:55", "throughput": 5542.64, "total_tokens": 11364608} +{"current_steps": 23085, "total_steps": 37885, "loss": 0.0, "lr": 7.944381949851353e-07, "epoch": 3.04672033786459, "percentage": 60.93, "elapsed_time": "0:34:10", "remaining_time": "0:21:54", "throughput": 5542.9, "total_tokens": 11366976} +{"current_steps": 23090, "total_steps": 37885, "loss": 0.0002, "lr": 7.939873572972908e-07, "epoch": 3.0473802296423385, "percentage": 60.95, "elapsed_time": "0:34:11", "remaining_time": "0:21:54", "throughput": 5543.21, "total_tokens": 11369408} +{"current_steps": 23095, "total_steps": 37885, "loss": 0.0001, "lr": 7.93536563334138e-07, "epoch": 3.048040121420087, "percentage": 60.96, "elapsed_time": "0:34:11", "remaining_time": "0:21:53", "throughput": 5543.53, "total_tokens": 11371904} +{"current_steps": 23100, "total_steps": 37885, "loss": 0.0001, "lr": 7.930858131913541e-07, "epoch": 3.0487000131978355, "percentage": 60.97, "elapsed_time": "0:34:11", "remaining_time": "0:21:53", "throughput": 5543.97, "total_tokens": 11374656} +{"current_steps": 23105, "total_steps": 37885, "loss": 0.0001, "lr": 7.926351069646084e-07, "epoch": 3.049359904975584, "percentage": 60.99, "elapsed_time": "0:34:12", "remaining_time": "0:21:52", "throughput": 5544.14, "total_tokens": 11376832} +{"current_steps": 23110, "total_steps": 37885, "loss": 0.0002, "lr": 7.921844447495594e-07, "epoch": 3.0500197967533325, "percentage": 61.0, "elapsed_time": "0:34:12", "remaining_time": "0:21:52", "throughput": 5544.44, "total_tokens": 11379264} +{"current_steps": 23115, "total_steps": 37885, "loss": 0.0001, "lr": 7.917338266418573e-07, "epoch": 3.0506796885310807, "percentage": 61.01, "elapsed_time": "0:34:12", "remaining_time": "0:21:51", "throughput": 5544.64, "total_tokens": 11381504} +{"current_steps": 23120, "total_steps": 37885, "loss": 0.0006, "lr": 7.912832527371426e-07, "epoch": 3.0513395803088295, "percentage": 61.03, "elapsed_time": "0:34:13", "remaining_time": "0:21:51", "throughput": 5545.1, "total_tokens": 11384320} +{"current_steps": 23125, "total_steps": 37885, "loss": 0.0, "lr": 7.908327231310454e-07, "epoch": 3.0519994720865777, "percentage": 61.04, "elapsed_time": "0:34:13", "remaining_time": "0:21:50", "throughput": 5545.39, "total_tokens": 11386752} +{"current_steps": 23130, "total_steps": 37885, "loss": 0.0001, "lr": 7.903822379191885e-07, "epoch": 3.0526593638643265, "percentage": 61.05, "elapsed_time": "0:34:13", "remaining_time": "0:21:50", "throughput": 5545.63, "total_tokens": 11389120} +{"current_steps": 23135, "total_steps": 37885, "loss": 0.0002, "lr": 7.899317971971835e-07, "epoch": 3.0533192556420747, "percentage": 61.07, "elapsed_time": "0:34:14", "remaining_time": "0:21:49", "throughput": 5545.98, "total_tokens": 11391680} +{"current_steps": 23140, "total_steps": 37885, "loss": 0.0, "lr": 7.894814010606336e-07, "epoch": 3.053979147419823, "percentage": 61.08, "elapsed_time": "0:34:14", "remaining_time": "0:21:49", "throughput": 5546.3, "total_tokens": 11394176} +{"current_steps": 23145, "total_steps": 37885, "loss": 0.0, "lr": 7.890310496051319e-07, "epoch": 3.0546390391975717, "percentage": 61.09, "elapsed_time": "0:34:14", "remaining_time": "0:21:48", "throughput": 5546.51, "total_tokens": 11396480} +{"current_steps": 23150, "total_steps": 37885, "loss": 0.0, "lr": 7.885807429262616e-07, "epoch": 3.05529893097532, "percentage": 61.11, "elapsed_time": "0:34:15", "remaining_time": "0:21:48", "throughput": 5546.88, "total_tokens": 11399104} +{"current_steps": 23155, "total_steps": 37885, "loss": 0.0007, "lr": 7.881304811195985e-07, "epoch": 3.0559588227530683, "percentage": 61.12, "elapsed_time": "0:34:15", "remaining_time": "0:21:47", "throughput": 5547.22, "total_tokens": 11401664} +{"current_steps": 23160, "total_steps": 37885, "loss": 0.0, "lr": 7.876802642807056e-07, "epoch": 3.056618714530817, "percentage": 61.13, "elapsed_time": "0:34:15", "remaining_time": "0:21:47", "throughput": 5547.45, "total_tokens": 11403968} +{"current_steps": 23165, "total_steps": 37885, "loss": 0.0, "lr": 7.8723009250514e-07, "epoch": 3.0572786063085653, "percentage": 61.15, "elapsed_time": "0:34:16", "remaining_time": "0:21:46", "throughput": 5547.87, "total_tokens": 11406720} +{"current_steps": 23170, "total_steps": 37885, "loss": 0.0054, "lr": 7.867799658884462e-07, "epoch": 3.057938498086314, "percentage": 61.16, "elapsed_time": "0:34:16", "remaining_time": "0:21:45", "throughput": 5548.3, "total_tokens": 11409472} +{"current_steps": 23175, "total_steps": 37885, "loss": 0.0196, "lr": 7.863298845261603e-07, "epoch": 3.0585983898640623, "percentage": 61.17, "elapsed_time": "0:34:16", "remaining_time": "0:21:45", "throughput": 5548.69, "total_tokens": 11412160} +{"current_steps": 23180, "total_steps": 37885, "loss": 0.0, "lr": 7.858798485138095e-07, "epoch": 3.0592582816418106, "percentage": 61.19, "elapsed_time": "0:34:17", "remaining_time": "0:21:44", "throughput": 5548.94, "total_tokens": 11414528} +{"current_steps": 23185, "total_steps": 37885, "loss": 0.0, "lr": 7.854298579469099e-07, "epoch": 3.0599181734195593, "percentage": 61.2, "elapsed_time": "0:34:17", "remaining_time": "0:21:44", "throughput": 5549.22, "total_tokens": 11416960} +{"current_steps": 23190, "total_steps": 37885, "loss": 0.0002, "lr": 7.849799129209697e-07, "epoch": 3.0605780651973076, "percentage": 61.21, "elapsed_time": "0:34:17", "remaining_time": "0:21:43", "throughput": 5549.58, "total_tokens": 11419584} +{"current_steps": 23195, "total_steps": 37885, "loss": 0.0, "lr": 7.845300135314857e-07, "epoch": 3.0612379569750563, "percentage": 61.22, "elapsed_time": "0:34:18", "remaining_time": "0:21:43", "throughput": 5549.87, "total_tokens": 11422016} +{"current_steps": 23200, "total_steps": 37885, "loss": 0.0002, "lr": 7.840801598739459e-07, "epoch": 3.0618978487528046, "percentage": 61.24, "elapsed_time": "0:34:18", "remaining_time": "0:21:42", "throughput": 5550.18, "total_tokens": 11424512} +{"current_steps": 23205, "total_steps": 37885, "loss": 0.0374, "lr": 7.836303520438288e-07, "epoch": 3.062557740530553, "percentage": 61.25, "elapsed_time": "0:34:18", "remaining_time": "0:21:42", "throughput": 5550.47, "total_tokens": 11426944} +{"current_steps": 23210, "total_steps": 37885, "loss": 0.0549, "lr": 7.831805901366025e-07, "epoch": 3.0632176323083016, "percentage": 61.26, "elapsed_time": "0:34:19", "remaining_time": "0:21:41", "throughput": 5550.69, "total_tokens": 11429248} +{"current_steps": 23215, "total_steps": 37885, "loss": 0.0, "lr": 7.827308742477259e-07, "epoch": 3.06387752408605, "percentage": 61.28, "elapsed_time": "0:34:19", "remaining_time": "0:21:41", "throughput": 5551.07, "total_tokens": 11431872} +{"current_steps": 23220, "total_steps": 37885, "loss": 0.0, "lr": 7.822812044726479e-07, "epoch": 3.064537415863798, "percentage": 61.29, "elapsed_time": "0:34:19", "remaining_time": "0:21:40", "throughput": 5551.39, "total_tokens": 11434368} +{"current_steps": 23225, "total_steps": 37885, "loss": 0.1095, "lr": 7.818315809068076e-07, "epoch": 3.065197307641547, "percentage": 61.3, "elapsed_time": "0:34:20", "remaining_time": "0:21:40", "throughput": 5551.66, "total_tokens": 11436800} +{"current_steps": 23230, "total_steps": 37885, "loss": 0.0, "lr": 7.813820036456344e-07, "epoch": 3.065857199419295, "percentage": 61.32, "elapsed_time": "0:34:20", "remaining_time": "0:21:39", "throughput": 5551.99, "total_tokens": 11439360} +{"current_steps": 23235, "total_steps": 37885, "loss": 0.0001, "lr": 7.809324727845478e-07, "epoch": 3.066517091197044, "percentage": 61.33, "elapsed_time": "0:34:20", "remaining_time": "0:21:39", "throughput": 5552.24, "total_tokens": 11441728} +{"current_steps": 23240, "total_steps": 37885, "loss": 0.0, "lr": 7.804829884189576e-07, "epoch": 3.067176982974792, "percentage": 61.34, "elapsed_time": "0:34:21", "remaining_time": "0:21:38", "throughput": 5552.67, "total_tokens": 11444480} +{"current_steps": 23245, "total_steps": 37885, "loss": 0.0, "lr": 7.800335506442635e-07, "epoch": 3.0678368747525404, "percentage": 61.36, "elapsed_time": "0:34:21", "remaining_time": "0:21:38", "throughput": 5553.07, "total_tokens": 11447168} +{"current_steps": 23250, "total_steps": 37885, "loss": 0.0, "lr": 7.795841595558554e-07, "epoch": 3.068496766530289, "percentage": 61.37, "elapsed_time": "0:34:21", "remaining_time": "0:21:37", "throughput": 5553.47, "total_tokens": 11449856} +{"current_steps": 23255, "total_steps": 37885, "loss": 0.0007, "lr": 7.791348152491133e-07, "epoch": 3.0691566583080374, "percentage": 61.38, "elapsed_time": "0:34:22", "remaining_time": "0:21:37", "throughput": 5553.72, "total_tokens": 11452224} +{"current_steps": 23260, "total_steps": 37885, "loss": 0.0, "lr": 7.78685517819407e-07, "epoch": 3.069816550085786, "percentage": 61.4, "elapsed_time": "0:34:22", "remaining_time": "0:21:36", "throughput": 5554.09, "total_tokens": 11454848} +{"current_steps": 23265, "total_steps": 37885, "loss": 0.0, "lr": 7.782362673620972e-07, "epoch": 3.0704764418635344, "percentage": 61.41, "elapsed_time": "0:34:22", "remaining_time": "0:21:36", "throughput": 5554.26, "total_tokens": 11457088} +{"current_steps": 23270, "total_steps": 37885, "loss": 0.0001, "lr": 7.777870639725339e-07, "epoch": 3.0711363336412827, "percentage": 61.42, "elapsed_time": "0:34:23", "remaining_time": "0:21:35", "throughput": 5554.54, "total_tokens": 11459520} +{"current_steps": 23275, "total_steps": 37885, "loss": 0.0001, "lr": 7.773379077460569e-07, "epoch": 3.0717962254190314, "percentage": 61.44, "elapsed_time": "0:34:23", "remaining_time": "0:21:35", "throughput": 5554.8, "total_tokens": 11461952} +{"current_steps": 23280, "total_steps": 37885, "loss": 0.0003, "lr": 7.768887987779966e-07, "epoch": 3.0724561171967797, "percentage": 61.45, "elapsed_time": "0:34:23", "remaining_time": "0:21:34", "throughput": 5555.13, "total_tokens": 11464512} +{"current_steps": 23285, "total_steps": 37885, "loss": 0.0, "lr": 7.764397371636731e-07, "epoch": 3.073116008974528, "percentage": 61.46, "elapsed_time": "0:34:24", "remaining_time": "0:21:34", "throughput": 5555.44, "total_tokens": 11467008} +{"current_steps": 23290, "total_steps": 37885, "loss": 0.0, "lr": 7.759907229983967e-07, "epoch": 3.0737759007522767, "percentage": 61.48, "elapsed_time": "0:34:24", "remaining_time": "0:21:33", "throughput": 5555.56, "total_tokens": 11469120} +{"current_steps": 23295, "total_steps": 37885, "loss": 0.0, "lr": 7.755417563774673e-07, "epoch": 3.074435792530025, "percentage": 61.49, "elapsed_time": "0:34:24", "remaining_time": "0:21:33", "throughput": 5555.93, "total_tokens": 11471744} +{"current_steps": 23300, "total_steps": 37885, "loss": 0.0756, "lr": 7.75092837396174e-07, "epoch": 3.0750956843077737, "percentage": 61.5, "elapsed_time": "0:34:25", "remaining_time": "0:21:32", "throughput": 5556.2, "total_tokens": 11474112} +{"current_steps": 23305, "total_steps": 37885, "loss": 0.0, "lr": 7.746439661497981e-07, "epoch": 3.075755576085522, "percentage": 61.52, "elapsed_time": "0:34:25", "remaining_time": "0:21:32", "throughput": 5556.56, "total_tokens": 11476736} +{"current_steps": 23310, "total_steps": 37885, "loss": 0.0813, "lr": 7.741951427336078e-07, "epoch": 3.0764154678632702, "percentage": 61.53, "elapsed_time": "0:34:25", "remaining_time": "0:21:31", "throughput": 5556.83, "total_tokens": 11479168} +{"current_steps": 23315, "total_steps": 37885, "loss": 0.0, "lr": 7.737463672428638e-07, "epoch": 3.077075359641019, "percentage": 61.54, "elapsed_time": "0:34:26", "remaining_time": "0:21:31", "throughput": 5557.14, "total_tokens": 11481664} +{"current_steps": 23320, "total_steps": 37885, "loss": 0.0005, "lr": 7.732976397728151e-07, "epoch": 3.0777352514187672, "percentage": 61.55, "elapsed_time": "0:34:26", "remaining_time": "0:21:30", "throughput": 5557.44, "total_tokens": 11484160} +{"current_steps": 23325, "total_steps": 37885, "loss": 0.0001, "lr": 7.728489604187001e-07, "epoch": 3.078395143196516, "percentage": 61.57, "elapsed_time": "0:34:26", "remaining_time": "0:21:30", "throughput": 5557.72, "total_tokens": 11486592} +{"current_steps": 23330, "total_steps": 37885, "loss": 0.0, "lr": 7.72400329275749e-07, "epoch": 3.0790550349742642, "percentage": 61.58, "elapsed_time": "0:34:27", "remaining_time": "0:21:29", "throughput": 5558.03, "total_tokens": 11489088} +{"current_steps": 23335, "total_steps": 37885, "loss": 0.0252, "lr": 7.719517464391791e-07, "epoch": 3.0797149267520125, "percentage": 61.59, "elapsed_time": "0:34:27", "remaining_time": "0:21:29", "throughput": 5558.27, "total_tokens": 11491392} +{"current_steps": 23340, "total_steps": 37885, "loss": 0.0016, "lr": 7.715032120042004e-07, "epoch": 3.0803748185297612, "percentage": 61.61, "elapsed_time": "0:34:27", "remaining_time": "0:21:28", "throughput": 5558.54, "total_tokens": 11493760} +{"current_steps": 23345, "total_steps": 37885, "loss": 0.0001, "lr": 7.710547260660096e-07, "epoch": 3.0810347103075095, "percentage": 61.62, "elapsed_time": "0:34:28", "remaining_time": "0:21:28", "throughput": 5558.89, "total_tokens": 11496320} +{"current_steps": 23350, "total_steps": 37885, "loss": 0.0, "lr": 7.706062887197959e-07, "epoch": 3.081694602085258, "percentage": 61.63, "elapsed_time": "0:34:28", "remaining_time": "0:21:27", "throughput": 5559.16, "total_tokens": 11498688} +{"current_steps": 23355, "total_steps": 37885, "loss": 0.0, "lr": 7.701579000607362e-07, "epoch": 3.0823544938630065, "percentage": 61.65, "elapsed_time": "0:34:28", "remaining_time": "0:21:27", "throughput": 5559.51, "total_tokens": 11501248} +{"current_steps": 23360, "total_steps": 37885, "loss": 0.0, "lr": 7.697095601839975e-07, "epoch": 3.083014385640755, "percentage": 61.66, "elapsed_time": "0:34:29", "remaining_time": "0:21:26", "throughput": 5559.81, "total_tokens": 11503680} +{"current_steps": 23365, "total_steps": 37885, "loss": 0.0, "lr": 7.692612691847373e-07, "epoch": 3.0836742774185035, "percentage": 61.67, "elapsed_time": "0:34:29", "remaining_time": "0:21:26", "throughput": 5560.13, "total_tokens": 11506176} +{"current_steps": 23370, "total_steps": 37885, "loss": 0.0673, "lr": 7.688130271581015e-07, "epoch": 3.084334169196252, "percentage": 61.69, "elapsed_time": "0:34:29", "remaining_time": "0:21:25", "throughput": 5560.62, "total_tokens": 11509056} +{"current_steps": 23375, "total_steps": 37885, "loss": 0.0, "lr": 7.68364834199227e-07, "epoch": 3.084994060974, "percentage": 61.7, "elapsed_time": "0:34:30", "remaining_time": "0:21:24", "throughput": 5560.82, "total_tokens": 11511296} +{"current_steps": 23380, "total_steps": 37885, "loss": 0.0, "lr": 7.679166904032389e-07, "epoch": 3.085653952751749, "percentage": 61.71, "elapsed_time": "0:34:30", "remaining_time": "0:21:24", "throughput": 5561.16, "total_tokens": 11513856} +{"current_steps": 23385, "total_steps": 37885, "loss": 0.0002, "lr": 7.674685958652525e-07, "epoch": 3.086313844529497, "percentage": 61.73, "elapsed_time": "0:34:30", "remaining_time": "0:21:23", "throughput": 5561.4, "total_tokens": 11516160} +{"current_steps": 23390, "total_steps": 37885, "loss": 0.0252, "lr": 7.67020550680373e-07, "epoch": 3.086973736307246, "percentage": 61.74, "elapsed_time": "0:34:31", "remaining_time": "0:21:23", "throughput": 5561.61, "total_tokens": 11518400} +{"current_steps": 23395, "total_steps": 37885, "loss": 0.1016, "lr": 7.665725549436942e-07, "epoch": 3.087633628084994, "percentage": 61.75, "elapsed_time": "0:34:31", "remaining_time": "0:21:22", "throughput": 5562.04, "total_tokens": 11521152} +{"current_steps": 23400, "total_steps": 37885, "loss": 0.0427, "lr": 7.661246087503006e-07, "epoch": 3.0882935198627424, "percentage": 61.77, "elapsed_time": "0:34:31", "remaining_time": "0:21:22", "throughput": 5562.42, "total_tokens": 11523776} +{"current_steps": 23405, "total_steps": 37885, "loss": 0.0001, "lr": 7.656767121952651e-07, "epoch": 3.088953411640491, "percentage": 61.78, "elapsed_time": "0:34:32", "remaining_time": "0:21:21", "throughput": 5562.71, "total_tokens": 11526208} +{"current_steps": 23410, "total_steps": 37885, "loss": 0.0002, "lr": 7.652288653736504e-07, "epoch": 3.0896133034182394, "percentage": 61.79, "elapsed_time": "0:34:32", "remaining_time": "0:21:21", "throughput": 5563.02, "total_tokens": 11528704} +{"current_steps": 23415, "total_steps": 37885, "loss": 0.0, "lr": 7.647810683805091e-07, "epoch": 3.0902731951959876, "percentage": 61.81, "elapsed_time": "0:34:32", "remaining_time": "0:21:20", "throughput": 5563.45, "total_tokens": 11531456} +{"current_steps": 23420, "total_steps": 37885, "loss": 0.117, "lr": 7.643333213108827e-07, "epoch": 3.0909330869737364, "percentage": 61.82, "elapsed_time": "0:34:33", "remaining_time": "0:21:20", "throughput": 5563.73, "total_tokens": 11533824} +{"current_steps": 23425, "total_steps": 37885, "loss": 0.0, "lr": 7.638856242598024e-07, "epoch": 3.0915929787514846, "percentage": 61.83, "elapsed_time": "0:34:33", "remaining_time": "0:21:19", "throughput": 5564.08, "total_tokens": 11536384} +{"current_steps": 23430, "total_steps": 37885, "loss": 0.002, "lr": 7.634379773222885e-07, "epoch": 3.0922528705292334, "percentage": 61.85, "elapsed_time": "0:34:33", "remaining_time": "0:21:19", "throughput": 5564.43, "total_tokens": 11538944} +{"current_steps": 23435, "total_steps": 37885, "loss": 0.0, "lr": 7.629903805933506e-07, "epoch": 3.0929127623069816, "percentage": 61.86, "elapsed_time": "0:34:34", "remaining_time": "0:21:18", "throughput": 5564.72, "total_tokens": 11541376} +{"current_steps": 23440, "total_steps": 37885, "loss": 0.0, "lr": 7.625428341679885e-07, "epoch": 3.09357265408473, "percentage": 61.87, "elapsed_time": "0:34:34", "remaining_time": "0:21:18", "throughput": 5565.04, "total_tokens": 11543872} +{"current_steps": 23445, "total_steps": 37885, "loss": 0.0456, "lr": 7.6209533814119e-07, "epoch": 3.0942325458624786, "percentage": 61.88, "elapsed_time": "0:34:34", "remaining_time": "0:21:17", "throughput": 5565.34, "total_tokens": 11546368} +{"current_steps": 23450, "total_steps": 37885, "loss": 0.0, "lr": 7.616478926079335e-07, "epoch": 3.094892437640227, "percentage": 61.9, "elapsed_time": "0:34:35", "remaining_time": "0:21:17", "throughput": 5565.7, "total_tokens": 11548928} +{"current_steps": 23455, "total_steps": 37885, "loss": 0.0, "lr": 7.612004976631857e-07, "epoch": 3.0955523294179756, "percentage": 61.91, "elapsed_time": "0:34:35", "remaining_time": "0:21:16", "throughput": 5566.12, "total_tokens": 11551680} +{"current_steps": 23460, "total_steps": 37885, "loss": 0.0, "lr": 7.607531534019028e-07, "epoch": 3.096212221195724, "percentage": 61.92, "elapsed_time": "0:34:35", "remaining_time": "0:21:16", "throughput": 5566.39, "total_tokens": 11554048} +{"current_steps": 23465, "total_steps": 37885, "loss": 0.0002, "lr": 7.60305859919031e-07, "epoch": 3.096872112973472, "percentage": 61.94, "elapsed_time": "0:34:36", "remaining_time": "0:21:15", "throughput": 5566.65, "total_tokens": 11556416} +{"current_steps": 23470, "total_steps": 37885, "loss": 0.0, "lr": 7.598586173095043e-07, "epoch": 3.097532004751221, "percentage": 61.95, "elapsed_time": "0:34:36", "remaining_time": "0:21:15", "throughput": 5566.96, "total_tokens": 11558912} +{"current_steps": 23475, "total_steps": 37885, "loss": 0.0, "lr": 7.594114256682473e-07, "epoch": 3.098191896528969, "percentage": 61.96, "elapsed_time": "0:34:36", "remaining_time": "0:21:14", "throughput": 5567.3, "total_tokens": 11561472} +{"current_steps": 23480, "total_steps": 37885, "loss": 0.0719, "lr": 7.589642850901733e-07, "epoch": 3.0988517883067175, "percentage": 61.98, "elapsed_time": "0:34:37", "remaining_time": "0:21:14", "throughput": 5567.57, "total_tokens": 11563840} +{"current_steps": 23485, "total_steps": 37885, "loss": 0.001, "lr": 7.585171956701837e-07, "epoch": 3.099511680084466, "percentage": 61.99, "elapsed_time": "0:34:37", "remaining_time": "0:21:13", "throughput": 5567.97, "total_tokens": 11566528} +{"current_steps": 23490, "total_steps": 37885, "loss": 0.0, "lr": 7.580701575031713e-07, "epoch": 3.1001715718622145, "percentage": 62.0, "elapsed_time": "0:34:37", "remaining_time": "0:21:13", "throughput": 5568.12, "total_tokens": 11568640} +{"current_steps": 23495, "total_steps": 37885, "loss": 0.0009, "lr": 7.576231706840154e-07, "epoch": 3.100831463639963, "percentage": 62.02, "elapsed_time": "0:34:37", "remaining_time": "0:21:12", "throughput": 5568.44, "total_tokens": 11571136} +{"current_steps": 23500, "total_steps": 37885, "loss": 0.0, "lr": 7.571762353075869e-07, "epoch": 3.1014913554177115, "percentage": 62.03, "elapsed_time": "0:34:38", "remaining_time": "0:21:12", "throughput": 5568.73, "total_tokens": 11573568} +{"current_steps": 23505, "total_steps": 37885, "loss": 0.0, "lr": 7.56729351468744e-07, "epoch": 3.1021512471954598, "percentage": 62.04, "elapsed_time": "0:34:38", "remaining_time": "0:21:11", "throughput": 5569.05, "total_tokens": 11576064} +{"current_steps": 23510, "total_steps": 37885, "loss": 0.0722, "lr": 7.562825192623341e-07, "epoch": 3.1028111389732085, "percentage": 62.06, "elapsed_time": "0:34:38", "remaining_time": "0:21:11", "throughput": 5569.33, "total_tokens": 11578496} +{"current_steps": 23515, "total_steps": 37885, "loss": 0.0, "lr": 7.558357387831953e-07, "epoch": 3.1034710307509568, "percentage": 62.07, "elapsed_time": "0:34:39", "remaining_time": "0:21:10", "throughput": 5569.66, "total_tokens": 11580992} +{"current_steps": 23520, "total_steps": 37885, "loss": 0.0164, "lr": 7.553890101261522e-07, "epoch": 3.1041309225287055, "percentage": 62.08, "elapsed_time": "0:34:39", "remaining_time": "0:21:10", "throughput": 5569.98, "total_tokens": 11583488} +{"current_steps": 23525, "total_steps": 37885, "loss": 0.0, "lr": 7.54942333386021e-07, "epoch": 3.1047908143064538, "percentage": 62.1, "elapsed_time": "0:34:39", "remaining_time": "0:21:09", "throughput": 5570.41, "total_tokens": 11586240} +{"current_steps": 23530, "total_steps": 37885, "loss": 0.0, "lr": 7.544957086576049e-07, "epoch": 3.105450706084202, "percentage": 62.11, "elapsed_time": "0:34:40", "remaining_time": "0:21:09", "throughput": 5570.86, "total_tokens": 11589056} +{"current_steps": 23535, "total_steps": 37885, "loss": 0.0, "lr": 7.540491360356965e-07, "epoch": 3.1061105978619508, "percentage": 62.12, "elapsed_time": "0:34:40", "remaining_time": "0:21:08", "throughput": 5571.07, "total_tokens": 11591296} +{"current_steps": 23540, "total_steps": 37885, "loss": 0.0001, "lr": 7.53602615615078e-07, "epoch": 3.106770489639699, "percentage": 62.14, "elapsed_time": "0:34:40", "remaining_time": "0:21:08", "throughput": 5571.42, "total_tokens": 11593856} +{"current_steps": 23545, "total_steps": 37885, "loss": 0.0, "lr": 7.5315614749052e-07, "epoch": 3.1074303814174478, "percentage": 62.15, "elapsed_time": "0:34:41", "remaining_time": "0:21:07", "throughput": 5571.63, "total_tokens": 11596096} +{"current_steps": 23550, "total_steps": 37885, "loss": 0.0, "lr": 7.527097317567824e-07, "epoch": 3.108090273195196, "percentage": 62.16, "elapsed_time": "0:34:41", "remaining_time": "0:21:07", "throughput": 5571.94, "total_tokens": 11598592} +{"current_steps": 23555, "total_steps": 37885, "loss": 0.063, "lr": 7.522633685086135e-07, "epoch": 3.1087501649729443, "percentage": 62.18, "elapsed_time": "0:34:41", "remaining_time": "0:21:06", "throughput": 5572.25, "total_tokens": 11601088} +{"current_steps": 23560, "total_steps": 37885, "loss": 0.0026, "lr": 7.518170578407505e-07, "epoch": 3.109410056750693, "percentage": 62.19, "elapsed_time": "0:34:42", "remaining_time": "0:21:06", "throughput": 5572.64, "total_tokens": 11603712} +{"current_steps": 23565, "total_steps": 37885, "loss": 0.0003, "lr": 7.513707998479199e-07, "epoch": 3.1100699485284413, "percentage": 62.2, "elapsed_time": "0:34:42", "remaining_time": "0:21:05", "throughput": 5572.9, "total_tokens": 11606080} +{"current_steps": 23570, "total_steps": 37885, "loss": 0.0, "lr": 7.509245946248363e-07, "epoch": 3.1107298403061896, "percentage": 62.21, "elapsed_time": "0:34:42", "remaining_time": "0:21:05", "throughput": 5573.04, "total_tokens": 11608192} +{"current_steps": 23575, "total_steps": 37885, "loss": 0.0001, "lr": 7.504784422662042e-07, "epoch": 3.1113897320839383, "percentage": 62.23, "elapsed_time": "0:34:43", "remaining_time": "0:21:04", "throughput": 5573.27, "total_tokens": 11610496} +{"current_steps": 23580, "total_steps": 37885, "loss": 0.0004, "lr": 7.500323428667159e-07, "epoch": 3.1120496238616866, "percentage": 62.24, "elapsed_time": "0:34:43", "remaining_time": "0:21:04", "throughput": 5573.65, "total_tokens": 11613120} +{"current_steps": 23585, "total_steps": 37885, "loss": 0.0337, "lr": 7.495862965210525e-07, "epoch": 3.1127095156394353, "percentage": 62.25, "elapsed_time": "0:34:43", "remaining_time": "0:21:03", "throughput": 5573.81, "total_tokens": 11615296} +{"current_steps": 23590, "total_steps": 37885, "loss": 0.0008, "lr": 7.491403033238844e-07, "epoch": 3.1133694074171836, "percentage": 62.27, "elapsed_time": "0:34:44", "remaining_time": "0:21:02", "throughput": 5574.05, "total_tokens": 11617600} +{"current_steps": 23595, "total_steps": 37885, "loss": 0.0323, "lr": 7.4869436336987e-07, "epoch": 3.114029299194932, "percentage": 62.28, "elapsed_time": "0:34:44", "remaining_time": "0:21:02", "throughput": 5574.3, "total_tokens": 11619968} +{"current_steps": 23600, "total_steps": 37885, "loss": 0.0002, "lr": 7.482484767536576e-07, "epoch": 3.1146891909726806, "percentage": 62.29, "elapsed_time": "0:34:44", "remaining_time": "0:21:01", "throughput": 5574.56, "total_tokens": 11622336} +{"current_steps": 23605, "total_steps": 37885, "loss": 0.0001, "lr": 7.478026435698827e-07, "epoch": 3.115349082750429, "percentage": 62.31, "elapsed_time": "0:34:45", "remaining_time": "0:21:01", "throughput": 5574.91, "total_tokens": 11624896} +{"current_steps": 23610, "total_steps": 37885, "loss": 0.0005, "lr": 7.473568639131706e-07, "epoch": 3.116008974528177, "percentage": 62.32, "elapsed_time": "0:34:45", "remaining_time": "0:21:00", "throughput": 5575.16, "total_tokens": 11627264} +{"current_steps": 23615, "total_steps": 37885, "loss": 0.0001, "lr": 7.469111378781346e-07, "epoch": 3.116668866305926, "percentage": 62.33, "elapsed_time": "0:34:45", "remaining_time": "0:21:00", "throughput": 5575.59, "total_tokens": 11630016} +{"current_steps": 23620, "total_steps": 37885, "loss": 0.1031, "lr": 7.464654655593767e-07, "epoch": 3.117328758083674, "percentage": 62.35, "elapsed_time": "0:34:46", "remaining_time": "0:20:59", "throughput": 5575.89, "total_tokens": 11632448} +{"current_steps": 23625, "total_steps": 37885, "loss": 0.0, "lr": 7.46019847051488e-07, "epoch": 3.117988649861423, "percentage": 62.36, "elapsed_time": "0:34:46", "remaining_time": "0:20:59", "throughput": 5576.2, "total_tokens": 11634944} +{"current_steps": 23630, "total_steps": 37885, "loss": 0.0002, "lr": 7.455742824490477e-07, "epoch": 3.118648541639171, "percentage": 62.37, "elapsed_time": "0:34:46", "remaining_time": "0:20:58", "throughput": 5576.61, "total_tokens": 11637632} +{"current_steps": 23635, "total_steps": 37885, "loss": 0.0, "lr": 7.45128771846623e-07, "epoch": 3.1193084334169194, "percentage": 62.39, "elapsed_time": "0:34:47", "remaining_time": "0:20:58", "throughput": 5576.96, "total_tokens": 11640192} +{"current_steps": 23640, "total_steps": 37885, "loss": 0.0, "lr": 7.446833153387714e-07, "epoch": 3.119968325194668, "percentage": 62.4, "elapsed_time": "0:34:47", "remaining_time": "0:20:57", "throughput": 5577.35, "total_tokens": 11642880} +{"current_steps": 23645, "total_steps": 37885, "loss": 0.0, "lr": 7.442379130200369e-07, "epoch": 3.1206282169724164, "percentage": 62.41, "elapsed_time": "0:34:47", "remaining_time": "0:20:57", "throughput": 5577.58, "total_tokens": 11645184} +{"current_steps": 23650, "total_steps": 37885, "loss": 0.0, "lr": 7.437925649849534e-07, "epoch": 3.121288108750165, "percentage": 62.43, "elapsed_time": "0:34:48", "remaining_time": "0:20:56", "throughput": 5577.84, "total_tokens": 11647552} +{"current_steps": 23655, "total_steps": 37885, "loss": 0.0613, "lr": 7.433472713280426e-07, "epoch": 3.1219480005279134, "percentage": 62.44, "elapsed_time": "0:34:48", "remaining_time": "0:20:56", "throughput": 5578.23, "total_tokens": 11650240} +{"current_steps": 23660, "total_steps": 37885, "loss": 0.0004, "lr": 7.42902032143815e-07, "epoch": 3.1226078923056617, "percentage": 62.45, "elapsed_time": "0:34:48", "remaining_time": "0:20:55", "throughput": 5578.51, "total_tokens": 11652672} +{"current_steps": 23665, "total_steps": 37885, "loss": 0.0, "lr": 7.424568475267697e-07, "epoch": 3.1232677840834104, "percentage": 62.47, "elapsed_time": "0:34:49", "remaining_time": "0:20:55", "throughput": 5578.69, "total_tokens": 11654848} +{"current_steps": 23670, "total_steps": 37885, "loss": 0.0, "lr": 7.42011717571393e-07, "epoch": 3.1239276758611587, "percentage": 62.48, "elapsed_time": "0:34:49", "remaining_time": "0:20:54", "throughput": 5578.8, "total_tokens": 11656896} +{"current_steps": 23675, "total_steps": 37885, "loss": 0.0, "lr": 7.415666423721613e-07, "epoch": 3.1245875676389074, "percentage": 62.49, "elapsed_time": "0:34:49", "remaining_time": "0:20:54", "throughput": 5579.06, "total_tokens": 11659264} +{"current_steps": 23680, "total_steps": 37885, "loss": 0.0, "lr": 7.411216220235381e-07, "epoch": 3.1252474594166557, "percentage": 62.5, "elapsed_time": "0:34:50", "remaining_time": "0:20:53", "throughput": 5579.38, "total_tokens": 11661760} +{"current_steps": 23685, "total_steps": 37885, "loss": 0.0891, "lr": 7.406766566199762e-07, "epoch": 3.125907351194404, "percentage": 62.52, "elapsed_time": "0:34:50", "remaining_time": "0:20:53", "throughput": 5579.57, "total_tokens": 11664000} +{"current_steps": 23690, "total_steps": 37885, "loss": 0.0001, "lr": 7.402317462559163e-07, "epoch": 3.1265672429721527, "percentage": 62.53, "elapsed_time": "0:34:50", "remaining_time": "0:20:52", "throughput": 5579.94, "total_tokens": 11666624} +{"current_steps": 23695, "total_steps": 37885, "loss": 0.0, "lr": 7.397868910257865e-07, "epoch": 3.127227134749901, "percentage": 62.54, "elapsed_time": "0:34:51", "remaining_time": "0:20:52", "throughput": 5580.37, "total_tokens": 11669376} +{"current_steps": 23700, "total_steps": 37885, "loss": 0.0564, "lr": 7.393420910240054e-07, "epoch": 3.1278870265276493, "percentage": 62.56, "elapsed_time": "0:34:51", "remaining_time": "0:20:51", "throughput": 5580.79, "total_tokens": 11672128} +{"current_steps": 23705, "total_steps": 37885, "loss": 0.0, "lr": 7.388973463449773e-07, "epoch": 3.128546918305398, "percentage": 62.57, "elapsed_time": "0:34:51", "remaining_time": "0:20:51", "throughput": 5581.05, "total_tokens": 11674496} +{"current_steps": 23710, "total_steps": 37885, "loss": 0.0, "lr": 7.384526570830972e-07, "epoch": 3.1292068100831463, "percentage": 62.58, "elapsed_time": "0:34:52", "remaining_time": "0:20:50", "throughput": 5581.37, "total_tokens": 11676992} +{"current_steps": 23715, "total_steps": 37885, "loss": 0.0004, "lr": 7.380080233327466e-07, "epoch": 3.129866701860895, "percentage": 62.6, "elapsed_time": "0:34:52", "remaining_time": "0:20:50", "throughput": 5581.79, "total_tokens": 11679744} +{"current_steps": 23720, "total_steps": 37885, "loss": 0.0087, "lr": 7.375634451882956e-07, "epoch": 3.1305265936386433, "percentage": 62.61, "elapsed_time": "0:34:52", "remaining_time": "0:20:49", "throughput": 5582.02, "total_tokens": 11682048} +{"current_steps": 23725, "total_steps": 37885, "loss": 0.0213, "lr": 7.371189227441031e-07, "epoch": 3.1311864854163916, "percentage": 62.62, "elapsed_time": "0:34:53", "remaining_time": "0:20:49", "throughput": 5582.36, "total_tokens": 11684608} +{"current_steps": 23730, "total_steps": 37885, "loss": 0.0552, "lr": 7.366744560945155e-07, "epoch": 3.1318463771941403, "percentage": 62.64, "elapsed_time": "0:34:53", "remaining_time": "0:20:48", "throughput": 5582.62, "total_tokens": 11686976} +{"current_steps": 23735, "total_steps": 37885, "loss": 0.0001, "lr": 7.362300453338679e-07, "epoch": 3.1325062689718886, "percentage": 62.65, "elapsed_time": "0:34:53", "remaining_time": "0:20:48", "throughput": 5582.85, "total_tokens": 11689280} +{"current_steps": 23740, "total_steps": 37885, "loss": 0.0, "lr": 7.357856905564832e-07, "epoch": 3.133166160749637, "percentage": 62.66, "elapsed_time": "0:34:54", "remaining_time": "0:20:47", "throughput": 5583.17, "total_tokens": 11691776} +{"current_steps": 23745, "total_steps": 37885, "loss": 0.0, "lr": 7.353413918566721e-07, "epoch": 3.1338260525273856, "percentage": 62.68, "elapsed_time": "0:34:54", "remaining_time": "0:20:47", "throughput": 5583.4, "total_tokens": 11694080} +{"current_steps": 23750, "total_steps": 37885, "loss": 0.0, "lr": 7.348971493287342e-07, "epoch": 3.134485944305134, "percentage": 62.69, "elapsed_time": "0:34:54", "remaining_time": "0:20:46", "throughput": 5583.74, "total_tokens": 11696640} +{"current_steps": 23755, "total_steps": 37885, "loss": 0.0239, "lr": 7.344529630669565e-07, "epoch": 3.1351458360828826, "percentage": 62.7, "elapsed_time": "0:34:55", "remaining_time": "0:20:46", "throughput": 5584.04, "total_tokens": 11699136} +{"current_steps": 23760, "total_steps": 37885, "loss": 0.0005, "lr": 7.340088331656147e-07, "epoch": 3.135805727860631, "percentage": 62.72, "elapsed_time": "0:34:55", "remaining_time": "0:20:45", "throughput": 5584.36, "total_tokens": 11701632} +{"current_steps": 23765, "total_steps": 37885, "loss": 0.0322, "lr": 7.33564759718972e-07, "epoch": 3.136465619638379, "percentage": 62.73, "elapsed_time": "0:34:55", "remaining_time": "0:20:45", "throughput": 5584.62, "total_tokens": 11704000} +{"current_steps": 23770, "total_steps": 37885, "loss": 0.0472, "lr": 7.331207428212792e-07, "epoch": 3.137125511416128, "percentage": 62.74, "elapsed_time": "0:34:56", "remaining_time": "0:20:44", "throughput": 5584.98, "total_tokens": 11706624} +{"current_steps": 23775, "total_steps": 37885, "loss": 0.0004, "lr": 7.326767825667766e-07, "epoch": 3.137785403193876, "percentage": 62.76, "elapsed_time": "0:34:56", "remaining_time": "0:20:44", "throughput": 5585.18, "total_tokens": 11708864} +{"current_steps": 23780, "total_steps": 37885, "loss": 0.0215, "lr": 7.322328790496908e-07, "epoch": 3.138445294971625, "percentage": 62.77, "elapsed_time": "0:34:56", "remaining_time": "0:20:43", "throughput": 5585.46, "total_tokens": 11711296} +{"current_steps": 23785, "total_steps": 37885, "loss": 0.0001, "lr": 7.317890323642375e-07, "epoch": 3.139105186749373, "percentage": 62.78, "elapsed_time": "0:34:57", "remaining_time": "0:20:43", "throughput": 5585.69, "total_tokens": 11713600} +{"current_steps": 23790, "total_steps": 37885, "loss": 0.0, "lr": 7.3134524260462e-07, "epoch": 3.1397650785271214, "percentage": 62.8, "elapsed_time": "0:34:57", "remaining_time": "0:20:42", "throughput": 5585.98, "total_tokens": 11716032} +{"current_steps": 23795, "total_steps": 37885, "loss": 0.0, "lr": 7.30901509865029e-07, "epoch": 3.14042497030487, "percentage": 62.81, "elapsed_time": "0:34:57", "remaining_time": "0:20:42", "throughput": 5586.32, "total_tokens": 11718592} +{"current_steps": 23800, "total_steps": 37885, "loss": 0.0001, "lr": 7.304578342396441e-07, "epoch": 3.1410848620826184, "percentage": 62.82, "elapsed_time": "0:34:58", "remaining_time": "0:20:41", "throughput": 5586.72, "total_tokens": 11721280} +{"current_steps": 23805, "total_steps": 37885, "loss": 0.0014, "lr": 7.300142158226319e-07, "epoch": 3.141744753860367, "percentage": 62.83, "elapsed_time": "0:34:58", "remaining_time": "0:20:41", "throughput": 5587.09, "total_tokens": 11723904} +{"current_steps": 23810, "total_steps": 37885, "loss": 0.0, "lr": 7.295706547081475e-07, "epoch": 3.1424046456381154, "percentage": 62.85, "elapsed_time": "0:34:58", "remaining_time": "0:20:40", "throughput": 5587.36, "total_tokens": 11726336} +{"current_steps": 23815, "total_steps": 37885, "loss": 0.0, "lr": 7.291271509903334e-07, "epoch": 3.1430645374158637, "percentage": 62.86, "elapsed_time": "0:34:59", "remaining_time": "0:20:40", "throughput": 5587.59, "total_tokens": 11728640} +{"current_steps": 23820, "total_steps": 37885, "loss": 0.0001, "lr": 7.286837047633195e-07, "epoch": 3.1437244291936124, "percentage": 62.87, "elapsed_time": "0:34:59", "remaining_time": "0:20:39", "throughput": 5587.88, "total_tokens": 11731072} +{"current_steps": 23825, "total_steps": 37885, "loss": 0.0001, "lr": 7.282403161212251e-07, "epoch": 3.1443843209713607, "percentage": 62.89, "elapsed_time": "0:34:59", "remaining_time": "0:20:39", "throughput": 5588.2, "total_tokens": 11733568} +{"current_steps": 23830, "total_steps": 37885, "loss": 0.1047, "lr": 7.277969851581551e-07, "epoch": 3.145044212749109, "percentage": 62.9, "elapsed_time": "0:35:00", "remaining_time": "0:20:38", "throughput": 5588.51, "total_tokens": 11736064} +{"current_steps": 23835, "total_steps": 37885, "loss": 0.0001, "lr": 7.273537119682045e-07, "epoch": 3.1457041045268577, "percentage": 62.91, "elapsed_time": "0:35:00", "remaining_time": "0:20:38", "throughput": 5588.77, "total_tokens": 11738432} +{"current_steps": 23840, "total_steps": 37885, "loss": 0.0, "lr": 7.26910496645454e-07, "epoch": 3.146363996304606, "percentage": 62.93, "elapsed_time": "0:35:00", "remaining_time": "0:20:37", "throughput": 5589.16, "total_tokens": 11741120} +{"current_steps": 23845, "total_steps": 37885, "loss": 0.0, "lr": 7.264673392839726e-07, "epoch": 3.1470238880823547, "percentage": 62.94, "elapsed_time": "0:35:01", "remaining_time": "0:20:37", "throughput": 5589.33, "total_tokens": 11743296} +{"current_steps": 23850, "total_steps": 37885, "loss": 0.0411, "lr": 7.260242399778183e-07, "epoch": 3.147683779860103, "percentage": 62.95, "elapsed_time": "0:35:01", "remaining_time": "0:20:36", "throughput": 5589.64, "total_tokens": 11745792} +{"current_steps": 23855, "total_steps": 37885, "loss": 0.0252, "lr": 7.255811988210343e-07, "epoch": 3.1483436716378512, "percentage": 62.97, "elapsed_time": "0:35:01", "remaining_time": "0:20:36", "throughput": 5589.81, "total_tokens": 11747968} +{"current_steps": 23860, "total_steps": 37885, "loss": 0.0896, "lr": 7.251382159076544e-07, "epoch": 3.1490035634156, "percentage": 62.98, "elapsed_time": "0:35:02", "remaining_time": "0:20:35", "throughput": 5589.97, "total_tokens": 11750144} +{"current_steps": 23865, "total_steps": 37885, "loss": 0.1151, "lr": 7.246952913316977e-07, "epoch": 3.1496634551933482, "percentage": 62.99, "elapsed_time": "0:35:02", "remaining_time": "0:20:35", "throughput": 5590.31, "total_tokens": 11752704} +{"current_steps": 23870, "total_steps": 37885, "loss": 0.0766, "lr": 7.242524251871714e-07, "epoch": 3.1503233469710965, "percentage": 63.01, "elapsed_time": "0:35:02", "remaining_time": "0:20:34", "throughput": 5590.56, "total_tokens": 11755072} +{"current_steps": 23875, "total_steps": 37885, "loss": 0.0001, "lr": 7.238096175680714e-07, "epoch": 3.1509832387488452, "percentage": 63.02, "elapsed_time": "0:35:02", "remaining_time": "0:20:34", "throughput": 5590.84, "total_tokens": 11757504} +{"current_steps": 23880, "total_steps": 37885, "loss": 0.0004, "lr": 7.233668685683798e-07, "epoch": 3.1516431305265935, "percentage": 63.03, "elapsed_time": "0:35:03", "remaining_time": "0:20:33", "throughput": 5591.04, "total_tokens": 11759744} +{"current_steps": 23885, "total_steps": 37885, "loss": 0.0907, "lr": 7.229241782820673e-07, "epoch": 3.1523030223043422, "percentage": 63.05, "elapsed_time": "0:35:03", "remaining_time": "0:20:33", "throughput": 5591.33, "total_tokens": 11762176} +{"current_steps": 23890, "total_steps": 37885, "loss": 0.0, "lr": 7.224815468030916e-07, "epoch": 3.1529629140820905, "percentage": 63.06, "elapsed_time": "0:35:03", "remaining_time": "0:20:32", "throughput": 5591.63, "total_tokens": 11764672} +{"current_steps": 23895, "total_steps": 37885, "loss": 0.0, "lr": 7.220389742253978e-07, "epoch": 3.153622805859839, "percentage": 63.07, "elapsed_time": "0:35:04", "remaining_time": "0:20:32", "throughput": 5591.94, "total_tokens": 11767168} +{"current_steps": 23900, "total_steps": 37885, "loss": 0.0025, "lr": 7.21596460642919e-07, "epoch": 3.1542826976375875, "percentage": 63.09, "elapsed_time": "0:35:04", "remaining_time": "0:20:31", "throughput": 5592.24, "total_tokens": 11769664} +{"current_steps": 23905, "total_steps": 37885, "loss": 0.0, "lr": 7.211540061495751e-07, "epoch": 3.154942589415336, "percentage": 63.1, "elapsed_time": "0:35:04", "remaining_time": "0:20:31", "throughput": 5592.63, "total_tokens": 11772352} +{"current_steps": 23910, "total_steps": 37885, "loss": 0.0087, "lr": 7.207116108392746e-07, "epoch": 3.1556024811930845, "percentage": 63.11, "elapsed_time": "0:35:05", "remaining_time": "0:20:30", "throughput": 5592.86, "total_tokens": 11774656} +{"current_steps": 23915, "total_steps": 37885, "loss": 0.0001, "lr": 7.202692748059121e-07, "epoch": 3.156262372970833, "percentage": 63.13, "elapsed_time": "0:35:05", "remaining_time": "0:20:30", "throughput": 5593.09, "total_tokens": 11776960} +{"current_steps": 23920, "total_steps": 37885, "loss": 0.061, "lr": 7.1982699814337e-07, "epoch": 3.156922264748581, "percentage": 63.14, "elapsed_time": "0:35:05", "remaining_time": "0:20:29", "throughput": 5593.4, "total_tokens": 11779456} +{"current_steps": 23925, "total_steps": 37885, "loss": 0.0001, "lr": 7.193847809455192e-07, "epoch": 3.15758215652633, "percentage": 63.15, "elapsed_time": "0:35:06", "remaining_time": "0:20:28", "throughput": 5593.74, "total_tokens": 11782016} +{"current_steps": 23930, "total_steps": 37885, "loss": 0.0, "lr": 7.189426233062161e-07, "epoch": 3.158242048304078, "percentage": 63.16, "elapsed_time": "0:35:06", "remaining_time": "0:20:28", "throughput": 5594.02, "total_tokens": 11784448} +{"current_steps": 23935, "total_steps": 37885, "loss": 0.0, "lr": 7.185005253193064e-07, "epoch": 3.158901940081827, "percentage": 63.18, "elapsed_time": "0:35:06", "remaining_time": "0:20:27", "throughput": 5594.28, "total_tokens": 11786816} +{"current_steps": 23940, "total_steps": 37885, "loss": 0.0299, "lr": 7.180584870786217e-07, "epoch": 3.159561831859575, "percentage": 63.19, "elapsed_time": "0:35:07", "remaining_time": "0:20:27", "throughput": 5594.51, "total_tokens": 11789120} +{"current_steps": 23945, "total_steps": 37885, "loss": 0.0052, "lr": 7.17616508677981e-07, "epoch": 3.1602217236373233, "percentage": 63.2, "elapsed_time": "0:35:07", "remaining_time": "0:20:26", "throughput": 5594.93, "total_tokens": 11791872} +{"current_steps": 23950, "total_steps": 37885, "loss": 0.0961, "lr": 7.171745902111919e-07, "epoch": 3.160881615415072, "percentage": 63.22, "elapsed_time": "0:35:07", "remaining_time": "0:20:26", "throughput": 5595.33, "total_tokens": 11794560} +{"current_steps": 23955, "total_steps": 37885, "loss": 0.0, "lr": 7.167327317720479e-07, "epoch": 3.1615415071928203, "percentage": 63.23, "elapsed_time": "0:35:08", "remaining_time": "0:20:25", "throughput": 5595.67, "total_tokens": 11797120} +{"current_steps": 23960, "total_steps": 37885, "loss": 0.0001, "lr": 7.162909334543303e-07, "epoch": 3.1622013989705686, "percentage": 63.24, "elapsed_time": "0:35:08", "remaining_time": "0:20:25", "throughput": 5596.01, "total_tokens": 11799680} +{"current_steps": 23965, "total_steps": 37885, "loss": 0.0, "lr": 7.158491953518079e-07, "epoch": 3.1628612907483173, "percentage": 63.26, "elapsed_time": "0:35:08", "remaining_time": "0:20:24", "throughput": 5596.27, "total_tokens": 11802048} +{"current_steps": 23970, "total_steps": 37885, "loss": 0.0431, "lr": 7.154075175582355e-07, "epoch": 3.1635211825260656, "percentage": 63.27, "elapsed_time": "0:35:09", "remaining_time": "0:20:24", "throughput": 5596.57, "total_tokens": 11804544} +{"current_steps": 23975, "total_steps": 37885, "loss": 0.0, "lr": 7.149659001673572e-07, "epoch": 3.1641810743038143, "percentage": 63.28, "elapsed_time": "0:35:09", "remaining_time": "0:20:23", "throughput": 5596.85, "total_tokens": 11806976} +{"current_steps": 23980, "total_steps": 37885, "loss": 0.0021, "lr": 7.14524343272902e-07, "epoch": 3.1648409660815626, "percentage": 63.3, "elapsed_time": "0:35:09", "remaining_time": "0:20:23", "throughput": 5597.11, "total_tokens": 11809344} +{"current_steps": 23985, "total_steps": 37885, "loss": 0.0001, "lr": 7.14082846968588e-07, "epoch": 3.165500857859311, "percentage": 63.31, "elapsed_time": "0:35:10", "remaining_time": "0:20:22", "throughput": 5597.42, "total_tokens": 11811840} +{"current_steps": 23990, "total_steps": 37885, "loss": 0.0001, "lr": 7.136414113481191e-07, "epoch": 3.1661607496370596, "percentage": 63.32, "elapsed_time": "0:35:10", "remaining_time": "0:20:22", "throughput": 5597.67, "total_tokens": 11814208} +{"current_steps": 23995, "total_steps": 37885, "loss": 0.0, "lr": 7.132000365051873e-07, "epoch": 3.166820641414808, "percentage": 63.34, "elapsed_time": "0:35:10", "remaining_time": "0:20:21", "throughput": 5598.0, "total_tokens": 11816768} +{"current_steps": 24000, "total_steps": 37885, "loss": 0.0002, "lr": 7.127587225334712e-07, "epoch": 3.1674805331925566, "percentage": 63.35, "elapsed_time": "0:35:11", "remaining_time": "0:20:21", "throughput": 5598.4, "total_tokens": 11819456} +{"current_steps": 24005, "total_steps": 37885, "loss": 0.0001, "lr": 7.123174695266354e-07, "epoch": 3.168140424970305, "percentage": 63.36, "elapsed_time": "0:35:11", "remaining_time": "0:20:20", "throughput": 5598.62, "total_tokens": 11821760} +{"current_steps": 24010, "total_steps": 37885, "loss": 0.0396, "lr": 7.11876277578334e-07, "epoch": 3.168800316748053, "percentage": 63.38, "elapsed_time": "0:35:11", "remaining_time": "0:20:20", "throughput": 5598.9, "total_tokens": 11824192} +{"current_steps": 24015, "total_steps": 37885, "loss": 0.0, "lr": 7.114351467822058e-07, "epoch": 3.169460208525802, "percentage": 63.39, "elapsed_time": "0:35:12", "remaining_time": "0:20:19", "throughput": 5599.21, "total_tokens": 11826688} +{"current_steps": 24020, "total_steps": 37885, "loss": 0.0, "lr": 7.109940772318787e-07, "epoch": 3.17012010030355, "percentage": 63.4, "elapsed_time": "0:35:12", "remaining_time": "0:20:19", "throughput": 5599.37, "total_tokens": 11828864} +{"current_steps": 24025, "total_steps": 37885, "loss": 0.0001, "lr": 7.105530690209656e-07, "epoch": 3.1707799920812985, "percentage": 63.42, "elapsed_time": "0:35:12", "remaining_time": "0:20:18", "throughput": 5599.6, "total_tokens": 11831168} +{"current_steps": 24030, "total_steps": 37885, "loss": 0.0626, "lr": 7.101121222430675e-07, "epoch": 3.171439883859047, "percentage": 63.43, "elapsed_time": "0:35:13", "remaining_time": "0:20:18", "throughput": 5600.11, "total_tokens": 11834176} +{"current_steps": 24035, "total_steps": 37885, "loss": 0.0, "lr": 7.096712369917724e-07, "epoch": 3.1720997756367955, "percentage": 63.44, "elapsed_time": "0:35:13", "remaining_time": "0:20:17", "throughput": 5600.25, "total_tokens": 11836288} +{"current_steps": 24040, "total_steps": 37885, "loss": 0.0008, "lr": 7.092304133606544e-07, "epoch": 3.172759667414544, "percentage": 63.46, "elapsed_time": "0:35:13", "remaining_time": "0:20:17", "throughput": 5600.66, "total_tokens": 11839040} +{"current_steps": 24045, "total_steps": 37885, "loss": 0.0, "lr": 7.087896514432762e-07, "epoch": 3.1734195591922925, "percentage": 63.47, "elapsed_time": "0:35:14", "remaining_time": "0:20:16", "throughput": 5600.86, "total_tokens": 11841280} +{"current_steps": 24050, "total_steps": 37885, "loss": 0.0511, "lr": 7.083489513331855e-07, "epoch": 3.1740794509700407, "percentage": 63.48, "elapsed_time": "0:35:14", "remaining_time": "0:20:16", "throughput": 5601.21, "total_tokens": 11843904} +{"current_steps": 24055, "total_steps": 37885, "loss": 0.0128, "lr": 7.079083131239177e-07, "epoch": 3.1747393427477895, "percentage": 63.49, "elapsed_time": "0:35:14", "remaining_time": "0:20:15", "throughput": 5601.49, "total_tokens": 11846336} +{"current_steps": 24060, "total_steps": 37885, "loss": 0.0008, "lr": 7.074677369089955e-07, "epoch": 3.1753992345255377, "percentage": 63.51, "elapsed_time": "0:35:15", "remaining_time": "0:20:15", "throughput": 5601.69, "total_tokens": 11848576} +{"current_steps": 24065, "total_steps": 37885, "loss": 0.0009, "lr": 7.070272227819276e-07, "epoch": 3.1760591263032865, "percentage": 63.52, "elapsed_time": "0:35:15", "remaining_time": "0:20:14", "throughput": 5601.82, "total_tokens": 11850688} +{"current_steps": 24070, "total_steps": 37885, "loss": 0.0, "lr": 7.065867708362103e-07, "epoch": 3.1767190180810347, "percentage": 63.53, "elapsed_time": "0:35:15", "remaining_time": "0:20:14", "throughput": 5602.05, "total_tokens": 11852992} +{"current_steps": 24075, "total_steps": 37885, "loss": 0.0001, "lr": 7.061463811653261e-07, "epoch": 3.177378909858783, "percentage": 63.55, "elapsed_time": "0:35:16", "remaining_time": "0:20:13", "throughput": 5602.46, "total_tokens": 11855744} +{"current_steps": 24080, "total_steps": 37885, "loss": 0.0001, "lr": 7.057060538627445e-07, "epoch": 3.1780388016365317, "percentage": 63.56, "elapsed_time": "0:35:16", "remaining_time": "0:20:13", "throughput": 5602.71, "total_tokens": 11858112} +{"current_steps": 24085, "total_steps": 37885, "loss": 0.0366, "lr": 7.05265789021922e-07, "epoch": 3.17869869341428, "percentage": 63.57, "elapsed_time": "0:35:16", "remaining_time": "0:20:12", "throughput": 5602.79, "total_tokens": 11860096} +{"current_steps": 24090, "total_steps": 37885, "loss": 0.0, "lr": 7.048255867363014e-07, "epoch": 3.1793585851920287, "percentage": 63.59, "elapsed_time": "0:35:17", "remaining_time": "0:20:12", "throughput": 5603.16, "total_tokens": 11862720} +{"current_steps": 24095, "total_steps": 37885, "loss": 0.0682, "lr": 7.043854470993125e-07, "epoch": 3.180018476969777, "percentage": 63.6, "elapsed_time": "0:35:17", "remaining_time": "0:20:11", "throughput": 5603.41, "total_tokens": 11865088} +{"current_steps": 24100, "total_steps": 37885, "loss": 0.1339, "lr": 7.039453702043719e-07, "epoch": 3.1806783687475253, "percentage": 63.61, "elapsed_time": "0:35:17", "remaining_time": "0:20:11", "throughput": 5603.77, "total_tokens": 11867712} +{"current_steps": 24105, "total_steps": 37885, "loss": 0.0034, "lr": 7.035053561448825e-07, "epoch": 3.181338260525274, "percentage": 63.63, "elapsed_time": "0:35:18", "remaining_time": "0:20:10", "throughput": 5604.11, "total_tokens": 11870272} +{"current_steps": 24110, "total_steps": 37885, "loss": 0.0, "lr": 7.030654050142341e-07, "epoch": 3.1819981523030223, "percentage": 63.64, "elapsed_time": "0:35:18", "remaining_time": "0:20:10", "throughput": 5604.47, "total_tokens": 11872896} +{"current_steps": 24115, "total_steps": 37885, "loss": 0.0706, "lr": 7.026255169058035e-07, "epoch": 3.1826580440807706, "percentage": 63.65, "elapsed_time": "0:35:18", "remaining_time": "0:20:09", "throughput": 5604.78, "total_tokens": 11875392} +{"current_steps": 24120, "total_steps": 37885, "loss": 0.0114, "lr": 7.021856919129534e-07, "epoch": 3.1833179358585193, "percentage": 63.67, "elapsed_time": "0:35:19", "remaining_time": "0:20:09", "throughput": 5605.0, "total_tokens": 11877696} +{"current_steps": 24125, "total_steps": 37885, "loss": 0.0308, "lr": 7.017459301290337e-07, "epoch": 3.1839778276362676, "percentage": 63.68, "elapsed_time": "0:35:19", "remaining_time": "0:20:08", "throughput": 5605.39, "total_tokens": 11880384} +{"current_steps": 24130, "total_steps": 37885, "loss": 0.0813, "lr": 7.013062316473803e-07, "epoch": 3.1846377194140163, "percentage": 63.69, "elapsed_time": "0:35:19", "remaining_time": "0:20:08", "throughput": 5605.73, "total_tokens": 11882944} +{"current_steps": 24135, "total_steps": 37885, "loss": 0.0, "lr": 7.008665965613165e-07, "epoch": 3.1852976111917646, "percentage": 63.71, "elapsed_time": "0:35:20", "remaining_time": "0:20:07", "throughput": 5606.03, "total_tokens": 11885440} +{"current_steps": 24140, "total_steps": 37885, "loss": 0.0001, "lr": 7.004270249641513e-07, "epoch": 3.185957502969513, "percentage": 63.72, "elapsed_time": "0:35:20", "remaining_time": "0:20:07", "throughput": 5606.23, "total_tokens": 11887680} +{"current_steps": 24145, "total_steps": 37885, "loss": 0.0009, "lr": 6.999875169491808e-07, "epoch": 3.1866173947472616, "percentage": 63.73, "elapsed_time": "0:35:20", "remaining_time": "0:20:06", "throughput": 5606.45, "total_tokens": 11889984} +{"current_steps": 24150, "total_steps": 37885, "loss": 0.0, "lr": 6.995480726096875e-07, "epoch": 3.18727728652501, "percentage": 63.75, "elapsed_time": "0:35:21", "remaining_time": "0:20:06", "throughput": 5606.65, "total_tokens": 11892224} +{"current_steps": 24155, "total_steps": 37885, "loss": 0.0441, "lr": 6.991086920389395e-07, "epoch": 3.187937178302758, "percentage": 63.76, "elapsed_time": "0:35:21", "remaining_time": "0:20:05", "throughput": 5606.93, "total_tokens": 11894656} +{"current_steps": 24160, "total_steps": 37885, "loss": 0.1136, "lr": 6.986693753301934e-07, "epoch": 3.188597070080507, "percentage": 63.77, "elapsed_time": "0:35:21", "remaining_time": "0:20:05", "throughput": 5607.27, "total_tokens": 11897216} +{"current_steps": 24165, "total_steps": 37885, "loss": 0.0016, "lr": 6.982301225766897e-07, "epoch": 3.189256961858255, "percentage": 63.79, "elapsed_time": "0:35:22", "remaining_time": "0:20:04", "throughput": 5607.58, "total_tokens": 11899712} +{"current_steps": 24170, "total_steps": 37885, "loss": 0.0239, "lr": 6.977909338716578e-07, "epoch": 3.189916853636004, "percentage": 63.8, "elapsed_time": "0:35:22", "remaining_time": "0:20:04", "throughput": 5607.86, "total_tokens": 11902144} +{"current_steps": 24175, "total_steps": 37885, "loss": 0.099, "lr": 6.973518093083116e-07, "epoch": 3.190576745413752, "percentage": 63.81, "elapsed_time": "0:35:22", "remaining_time": "0:20:03", "throughput": 5608.16, "total_tokens": 11904640} +{"current_steps": 24180, "total_steps": 37885, "loss": 0.0008, "lr": 6.969127489798519e-07, "epoch": 3.1912366371915004, "percentage": 63.82, "elapsed_time": "0:35:23", "remaining_time": "0:20:03", "throughput": 5608.46, "total_tokens": 11907136} +{"current_steps": 24185, "total_steps": 37885, "loss": 0.0013, "lr": 6.964737529794669e-07, "epoch": 3.191896528969249, "percentage": 63.84, "elapsed_time": "0:35:23", "remaining_time": "0:20:02", "throughput": 5608.79, "total_tokens": 11909696} +{"current_steps": 24190, "total_steps": 37885, "loss": 0.0, "lr": 6.960348214003294e-07, "epoch": 3.1925564207469974, "percentage": 63.85, "elapsed_time": "0:35:23", "remaining_time": "0:20:02", "throughput": 5609.04, "total_tokens": 11912064} +{"current_steps": 24195, "total_steps": 37885, "loss": 0.0, "lr": 6.955959543356005e-07, "epoch": 3.193216312524746, "percentage": 63.86, "elapsed_time": "0:35:24", "remaining_time": "0:20:01", "throughput": 5609.26, "total_tokens": 11914368} +{"current_steps": 24200, "total_steps": 37885, "loss": 0.0001, "lr": 6.951571518784257e-07, "epoch": 3.1938762043024944, "percentage": 63.88, "elapsed_time": "0:35:24", "remaining_time": "0:20:01", "throughput": 5609.51, "total_tokens": 11916736} +{"current_steps": 24205, "total_steps": 37885, "loss": 0.0, "lr": 6.947184141219378e-07, "epoch": 3.1945360960802427, "percentage": 63.89, "elapsed_time": "0:35:24", "remaining_time": "0:20:00", "throughput": 5609.68, "total_tokens": 11918912} +{"current_steps": 24210, "total_steps": 37885, "loss": 0.0, "lr": 6.94279741159256e-07, "epoch": 3.1951959878579914, "percentage": 63.9, "elapsed_time": "0:35:25", "remaining_time": "0:20:00", "throughput": 5609.88, "total_tokens": 11921152} +{"current_steps": 24215, "total_steps": 37885, "loss": 0.0001, "lr": 6.93841133083485e-07, "epoch": 3.1958558796357397, "percentage": 63.92, "elapsed_time": "0:35:25", "remaining_time": "0:19:59", "throughput": 5610.07, "total_tokens": 11923392} +{"current_steps": 24220, "total_steps": 37885, "loss": 0.0738, "lr": 6.934025899877167e-07, "epoch": 3.1965157714134884, "percentage": 63.93, "elapsed_time": "0:35:25", "remaining_time": "0:19:59", "throughput": 5610.4, "total_tokens": 11925952} +{"current_steps": 24225, "total_steps": 37885, "loss": 0.0, "lr": 6.929641119650286e-07, "epoch": 3.1971756631912367, "percentage": 63.94, "elapsed_time": "0:35:26", "remaining_time": "0:19:58", "throughput": 5610.77, "total_tokens": 11928576} +{"current_steps": 24230, "total_steps": 37885, "loss": 0.0002, "lr": 6.92525699108484e-07, "epoch": 3.197835554968985, "percentage": 63.96, "elapsed_time": "0:35:26", "remaining_time": "0:19:58", "throughput": 5610.99, "total_tokens": 11930880} +{"current_steps": 24235, "total_steps": 37885, "loss": 0.0001, "lr": 6.920873515111336e-07, "epoch": 3.1984954467467337, "percentage": 63.97, "elapsed_time": "0:35:26", "remaining_time": "0:19:57", "throughput": 5611.27, "total_tokens": 11933312} +{"current_steps": 24240, "total_steps": 37885, "loss": 0.0142, "lr": 6.916490692660127e-07, "epoch": 3.199155338524482, "percentage": 63.98, "elapsed_time": "0:35:27", "remaining_time": "0:19:57", "throughput": 5611.62, "total_tokens": 11935936} +{"current_steps": 24245, "total_steps": 37885, "loss": 0.043, "lr": 6.912108524661443e-07, "epoch": 3.1998152303022303, "percentage": 64.0, "elapsed_time": "0:35:27", "remaining_time": "0:19:56", "throughput": 5611.76, "total_tokens": 11938048} +{"current_steps": 24250, "total_steps": 37885, "loss": 0.0, "lr": 6.907727012045363e-07, "epoch": 3.200475122079979, "percentage": 64.01, "elapsed_time": "0:35:27", "remaining_time": "0:19:56", "throughput": 5612.03, "total_tokens": 11940480} +{"current_steps": 24255, "total_steps": 37885, "loss": 0.0372, "lr": 6.903346155741831e-07, "epoch": 3.2011350138577273, "percentage": 64.02, "elapsed_time": "0:35:27", "remaining_time": "0:19:55", "throughput": 5612.29, "total_tokens": 11942848} +{"current_steps": 24260, "total_steps": 37885, "loss": 0.0, "lr": 6.898965956680655e-07, "epoch": 3.201794905635476, "percentage": 64.04, "elapsed_time": "0:35:28", "remaining_time": "0:19:55", "throughput": 5612.57, "total_tokens": 11945280} +{"current_steps": 24265, "total_steps": 37885, "loss": 0.0002, "lr": 6.894586415791497e-07, "epoch": 3.2024547974132243, "percentage": 64.05, "elapsed_time": "0:35:28", "remaining_time": "0:19:54", "throughput": 5612.98, "total_tokens": 11948032} +{"current_steps": 24270, "total_steps": 37885, "loss": 0.0007, "lr": 6.890207534003884e-07, "epoch": 3.2031146891909725, "percentage": 64.06, "elapsed_time": "0:35:28", "remaining_time": "0:19:54", "throughput": 5613.26, "total_tokens": 11950464} +{"current_steps": 24275, "total_steps": 37885, "loss": 0.0, "lr": 6.885829312247207e-07, "epoch": 3.2037745809687213, "percentage": 64.08, "elapsed_time": "0:35:29", "remaining_time": "0:19:53", "throughput": 5613.67, "total_tokens": 11953216} +{"current_steps": 24280, "total_steps": 37885, "loss": 0.0, "lr": 6.881451751450702e-07, "epoch": 3.2044344727464695, "percentage": 64.09, "elapsed_time": "0:35:29", "remaining_time": "0:19:53", "throughput": 5613.88, "total_tokens": 11955520} +{"current_steps": 24285, "total_steps": 37885, "loss": 0.0308, "lr": 6.877074852543483e-07, "epoch": 3.205094364524218, "percentage": 64.1, "elapsed_time": "0:35:29", "remaining_time": "0:19:52", "throughput": 5614.19, "total_tokens": 11958016} +{"current_steps": 24290, "total_steps": 37885, "loss": 0.0, "lr": 6.872698616454511e-07, "epoch": 3.2057542563019665, "percentage": 64.12, "elapsed_time": "0:35:30", "remaining_time": "0:19:52", "throughput": 5614.5, "total_tokens": 11960512} +{"current_steps": 24295, "total_steps": 37885, "loss": 0.0282, "lr": 6.868323044112612e-07, "epoch": 3.206414148079715, "percentage": 64.13, "elapsed_time": "0:35:30", "remaining_time": "0:19:51", "throughput": 5614.78, "total_tokens": 11962944} +{"current_steps": 24300, "total_steps": 37885, "loss": 0.0, "lr": 6.863948136446468e-07, "epoch": 3.2070740398574635, "percentage": 64.14, "elapsed_time": "0:35:30", "remaining_time": "0:19:51", "throughput": 5615.17, "total_tokens": 11965632} +{"current_steps": 24305, "total_steps": 37885, "loss": 0.075, "lr": 6.859573894384625e-07, "epoch": 3.207733931635212, "percentage": 64.15, "elapsed_time": "0:35:31", "remaining_time": "0:19:50", "throughput": 5615.36, "total_tokens": 11967872} +{"current_steps": 24310, "total_steps": 37885, "loss": 0.0236, "lr": 6.855200318855483e-07, "epoch": 3.20839382341296, "percentage": 64.17, "elapsed_time": "0:35:31", "remaining_time": "0:19:50", "throughput": 5615.52, "total_tokens": 11970048} +{"current_steps": 24315, "total_steps": 37885, "loss": 0.0001, "lr": 6.850827410787295e-07, "epoch": 3.209053715190709, "percentage": 64.18, "elapsed_time": "0:35:31", "remaining_time": "0:19:49", "throughput": 5615.74, "total_tokens": 11972352} +{"current_steps": 24320, "total_steps": 37885, "loss": 0.0001, "lr": 6.846455171108187e-07, "epoch": 3.209713606968457, "percentage": 64.19, "elapsed_time": "0:35:32", "remaining_time": "0:19:49", "throughput": 5616.03, "total_tokens": 11974784} +{"current_steps": 24325, "total_steps": 37885, "loss": 0.0, "lr": 6.842083600746131e-07, "epoch": 3.210373498746206, "percentage": 64.21, "elapsed_time": "0:35:32", "remaining_time": "0:19:48", "throughput": 5616.35, "total_tokens": 11977344} +{"current_steps": 24330, "total_steps": 37885, "loss": 0.0, "lr": 6.837712700628967e-07, "epoch": 3.211033390523954, "percentage": 64.22, "elapsed_time": "0:35:32", "remaining_time": "0:19:48", "throughput": 5616.69, "total_tokens": 11979904} +{"current_steps": 24335, "total_steps": 37885, "loss": 0.0001, "lr": 6.833342471684383e-07, "epoch": 3.2116932823017024, "percentage": 64.23, "elapsed_time": "0:35:33", "remaining_time": "0:19:47", "throughput": 5616.9, "total_tokens": 11982208} +{"current_steps": 24340, "total_steps": 37885, "loss": 0.0238, "lr": 6.828972914839924e-07, "epoch": 3.212353174079451, "percentage": 64.25, "elapsed_time": "0:35:33", "remaining_time": "0:19:47", "throughput": 5617.26, "total_tokens": 11984832} +{"current_steps": 24345, "total_steps": 37885, "loss": 0.0863, "lr": 6.824604031023005e-07, "epoch": 3.2130130658571994, "percentage": 64.26, "elapsed_time": "0:35:33", "remaining_time": "0:19:46", "throughput": 5617.58, "total_tokens": 11987392} +{"current_steps": 24350, "total_steps": 37885, "loss": 0.0002, "lr": 6.820235821160881e-07, "epoch": 3.213672957634948, "percentage": 64.27, "elapsed_time": "0:35:34", "remaining_time": "0:19:46", "throughput": 5617.78, "total_tokens": 11989632} +{"current_steps": 24355, "total_steps": 37885, "loss": 0.0001, "lr": 6.815868286180683e-07, "epoch": 3.2143328494126964, "percentage": 64.29, "elapsed_time": "0:35:34", "remaining_time": "0:19:45", "throughput": 5618.04, "total_tokens": 11992064} +{"current_steps": 24360, "total_steps": 37885, "loss": 0.001, "lr": 6.811501427009383e-07, "epoch": 3.2149927411904446, "percentage": 64.3, "elapsed_time": "0:35:34", "remaining_time": "0:19:45", "throughput": 5618.4, "total_tokens": 11994688} +{"current_steps": 24365, "total_steps": 37885, "loss": 0.0003, "lr": 6.807135244573814e-07, "epoch": 3.2156526329681934, "percentage": 64.31, "elapsed_time": "0:35:35", "remaining_time": "0:19:44", "throughput": 5618.67, "total_tokens": 11997120} +{"current_steps": 24370, "total_steps": 37885, "loss": 0.0005, "lr": 6.802769739800669e-07, "epoch": 3.2163125247459416, "percentage": 64.33, "elapsed_time": "0:35:35", "remaining_time": "0:19:44", "throughput": 5618.97, "total_tokens": 11999616} +{"current_steps": 24375, "total_steps": 37885, "loss": 0.0, "lr": 6.798404913616491e-07, "epoch": 3.21697241652369, "percentage": 64.34, "elapsed_time": "0:35:35", "remaining_time": "0:19:43", "throughput": 5619.3, "total_tokens": 12002176} +{"current_steps": 24380, "total_steps": 37885, "loss": 0.0487, "lr": 6.794040766947693e-07, "epoch": 3.2176323083014386, "percentage": 64.35, "elapsed_time": "0:35:36", "remaining_time": "0:19:43", "throughput": 5619.57, "total_tokens": 12004608} +{"current_steps": 24385, "total_steps": 37885, "loss": 0.0, "lr": 6.789677300720522e-07, "epoch": 3.218292200079187, "percentage": 64.37, "elapsed_time": "0:35:36", "remaining_time": "0:19:42", "throughput": 5619.88, "total_tokens": 12007104} +{"current_steps": 24390, "total_steps": 37885, "loss": 0.0, "lr": 6.785314515861096e-07, "epoch": 3.2189520918569356, "percentage": 64.38, "elapsed_time": "0:35:36", "remaining_time": "0:19:42", "throughput": 5620.21, "total_tokens": 12009664} +{"current_steps": 24395, "total_steps": 37885, "loss": 0.0, "lr": 6.780952413295387e-07, "epoch": 3.219611983634684, "percentage": 64.39, "elapsed_time": "0:35:37", "remaining_time": "0:19:41", "throughput": 5620.46, "total_tokens": 12012032} +{"current_steps": 24400, "total_steps": 37885, "loss": 0.0001, "lr": 6.776590993949217e-07, "epoch": 3.220271875412432, "percentage": 64.41, "elapsed_time": "0:35:37", "remaining_time": "0:19:41", "throughput": 5620.62, "total_tokens": 12014208} +{"current_steps": 24405, "total_steps": 37885, "loss": 0.0004, "lr": 6.772230258748266e-07, "epoch": 3.220931767190181, "percentage": 64.42, "elapsed_time": "0:35:37", "remaining_time": "0:19:40", "throughput": 5620.86, "total_tokens": 12016576} +{"current_steps": 24410, "total_steps": 37885, "loss": 0.0006, "lr": 6.767870208618071e-07, "epoch": 3.221591658967929, "percentage": 64.43, "elapsed_time": "0:35:38", "remaining_time": "0:19:40", "throughput": 5621.24, "total_tokens": 12019264} +{"current_steps": 24415, "total_steps": 37885, "loss": 0.0213, "lr": 6.763510844484015e-07, "epoch": 3.2222515507456775, "percentage": 64.45, "elapsed_time": "0:35:38", "remaining_time": "0:19:39", "throughput": 5621.5, "total_tokens": 12021632} +{"current_steps": 24420, "total_steps": 37885, "loss": 0.0283, "lr": 6.759152167271349e-07, "epoch": 3.222911442523426, "percentage": 64.46, "elapsed_time": "0:35:38", "remaining_time": "0:19:39", "throughput": 5621.75, "total_tokens": 12024000} +{"current_steps": 24425, "total_steps": 37885, "loss": 0.0, "lr": 6.754794177905165e-07, "epoch": 3.2235713343011745, "percentage": 64.47, "elapsed_time": "0:35:39", "remaining_time": "0:19:38", "throughput": 5622.02, "total_tokens": 12026432} +{"current_steps": 24430, "total_steps": 37885, "loss": 0.0, "lr": 6.750436877310418e-07, "epoch": 3.224231226078923, "percentage": 64.48, "elapsed_time": "0:35:39", "remaining_time": "0:19:38", "throughput": 5622.21, "total_tokens": 12028672} +{"current_steps": 24435, "total_steps": 37885, "loss": 0.0004, "lr": 6.746080266411913e-07, "epoch": 3.2248911178566715, "percentage": 64.5, "elapsed_time": "0:35:39", "remaining_time": "0:19:37", "throughput": 5622.64, "total_tokens": 12031488} +{"current_steps": 24440, "total_steps": 37885, "loss": 0.0001, "lr": 6.741724346134306e-07, "epoch": 3.2255510096344198, "percentage": 64.51, "elapsed_time": "0:35:40", "remaining_time": "0:19:37", "throughput": 5622.9, "total_tokens": 12033920} +{"current_steps": 24445, "total_steps": 37885, "loss": 0.0044, "lr": 6.737369117402114e-07, "epoch": 3.2262109014121685, "percentage": 64.52, "elapsed_time": "0:35:40", "remaining_time": "0:19:36", "throughput": 5623.12, "total_tokens": 12036224} +{"current_steps": 24450, "total_steps": 37885, "loss": 0.115, "lr": 6.733014581139699e-07, "epoch": 3.2268707931899168, "percentage": 64.54, "elapsed_time": "0:35:40", "remaining_time": "0:19:36", "throughput": 5623.34, "total_tokens": 12038528} +{"current_steps": 24455, "total_steps": 37885, "loss": 0.0, "lr": 6.728660738271283e-07, "epoch": 3.2275306849676655, "percentage": 64.55, "elapsed_time": "0:35:41", "remaining_time": "0:19:35", "throughput": 5623.58, "total_tokens": 12040896} +{"current_steps": 24460, "total_steps": 37885, "loss": 0.0, "lr": 6.724307589720936e-07, "epoch": 3.2281905767454138, "percentage": 64.56, "elapsed_time": "0:35:41", "remaining_time": "0:19:35", "throughput": 5623.72, "total_tokens": 12043008} +{"current_steps": 24465, "total_steps": 37885, "loss": 0.0023, "lr": 6.719955136412582e-07, "epoch": 3.228850468523162, "percentage": 64.58, "elapsed_time": "0:35:41", "remaining_time": "0:19:34", "throughput": 5624.01, "total_tokens": 12045504} +{"current_steps": 24470, "total_steps": 37885, "loss": 0.0, "lr": 6.715603379269998e-07, "epoch": 3.2295103603009108, "percentage": 64.59, "elapsed_time": "0:35:42", "remaining_time": "0:19:34", "throughput": 5624.23, "total_tokens": 12047808} +{"current_steps": 24475, "total_steps": 37885, "loss": 0.0338, "lr": 6.711252319216814e-07, "epoch": 3.230170252078659, "percentage": 64.6, "elapsed_time": "0:35:42", "remaining_time": "0:19:33", "throughput": 5624.61, "total_tokens": 12050496} +{"current_steps": 24480, "total_steps": 37885, "loss": 0.0, "lr": 6.70690195717651e-07, "epoch": 3.2308301438564078, "percentage": 64.62, "elapsed_time": "0:35:42", "remaining_time": "0:19:33", "throughput": 5624.86, "total_tokens": 12052864} +{"current_steps": 24485, "total_steps": 37885, "loss": 0.0, "lr": 6.70255229407242e-07, "epoch": 3.231490035634156, "percentage": 64.63, "elapsed_time": "0:35:43", "remaining_time": "0:19:32", "throughput": 5625.02, "total_tokens": 12055040} +{"current_steps": 24490, "total_steps": 37885, "loss": 0.0, "lr": 6.698203330827722e-07, "epoch": 3.2321499274119043, "percentage": 64.64, "elapsed_time": "0:35:43", "remaining_time": "0:19:32", "throughput": 5625.37, "total_tokens": 12057664} +{"current_steps": 24495, "total_steps": 37885, "loss": 0.1253, "lr": 6.693855068365464e-07, "epoch": 3.232809819189653, "percentage": 64.66, "elapsed_time": "0:35:43", "remaining_time": "0:19:31", "throughput": 5625.51, "total_tokens": 12059776} +{"current_steps": 24500, "total_steps": 37885, "loss": 0.0, "lr": 6.689507507608518e-07, "epoch": 3.2334697109674013, "percentage": 64.67, "elapsed_time": "0:35:44", "remaining_time": "0:19:31", "throughput": 5625.84, "total_tokens": 12062336} +{"current_steps": 24505, "total_steps": 37885, "loss": 0.0, "lr": 6.685160649479638e-07, "epoch": 3.2341296027451496, "percentage": 64.68, "elapsed_time": "0:35:44", "remaining_time": "0:19:30", "throughput": 5626.0, "total_tokens": 12064512} +{"current_steps": 24510, "total_steps": 37885, "loss": 0.0, "lr": 6.680814494901406e-07, "epoch": 3.2347894945228983, "percentage": 64.7, "elapsed_time": "0:35:44", "remaining_time": "0:19:30", "throughput": 5626.33, "total_tokens": 12067072} +{"current_steps": 24515, "total_steps": 37885, "loss": 0.0, "lr": 6.676469044796258e-07, "epoch": 3.2354493863006466, "percentage": 64.71, "elapsed_time": "0:35:45", "remaining_time": "0:19:29", "throughput": 5626.54, "total_tokens": 12069376} +{"current_steps": 24520, "total_steps": 37885, "loss": 0.0001, "lr": 6.672124300086492e-07, "epoch": 3.2361092780783953, "percentage": 64.72, "elapsed_time": "0:35:45", "remaining_time": "0:19:29", "throughput": 5626.85, "total_tokens": 12071872} +{"current_steps": 24525, "total_steps": 37885, "loss": 0.0548, "lr": 6.667780261694239e-07, "epoch": 3.2367691698561436, "percentage": 64.74, "elapsed_time": "0:35:45", "remaining_time": "0:19:28", "throughput": 5627.17, "total_tokens": 12074432} +{"current_steps": 24530, "total_steps": 37885, "loss": 0.0, "lr": 6.663436930541502e-07, "epoch": 3.237429061633892, "percentage": 64.75, "elapsed_time": "0:35:46", "remaining_time": "0:19:28", "throughput": 5627.36, "total_tokens": 12076672} +{"current_steps": 24535, "total_steps": 37885, "loss": 0.0, "lr": 6.659094307550112e-07, "epoch": 3.2380889534116406, "percentage": 64.76, "elapsed_time": "0:35:46", "remaining_time": "0:19:27", "throughput": 5627.66, "total_tokens": 12079168} +{"current_steps": 24540, "total_steps": 37885, "loss": 0.0, "lr": 6.654752393641763e-07, "epoch": 3.238748845189389, "percentage": 64.77, "elapsed_time": "0:35:46", "remaining_time": "0:19:27", "throughput": 5627.99, "total_tokens": 12081728} +{"current_steps": 24545, "total_steps": 37885, "loss": 0.0, "lr": 6.650411189737993e-07, "epoch": 3.239408736967137, "percentage": 64.79, "elapsed_time": "0:35:47", "remaining_time": "0:19:26", "throughput": 5628.26, "total_tokens": 12084160} +{"current_steps": 24550, "total_steps": 37885, "loss": 0.028, "lr": 6.646070696760192e-07, "epoch": 3.240068628744886, "percentage": 64.8, "elapsed_time": "0:35:47", "remaining_time": "0:19:26", "throughput": 5628.56, "total_tokens": 12086656} +{"current_steps": 24555, "total_steps": 37885, "loss": 0.0, "lr": 6.6417309156296e-07, "epoch": 3.240728520522634, "percentage": 64.81, "elapsed_time": "0:35:47", "remaining_time": "0:19:25", "throughput": 5628.81, "total_tokens": 12089024} +{"current_steps": 24560, "total_steps": 37885, "loss": 0.0, "lr": 6.637391847267302e-07, "epoch": 3.241388412300383, "percentage": 64.83, "elapsed_time": "0:35:48", "remaining_time": "0:19:25", "throughput": 5629.08, "total_tokens": 12091456} +{"current_steps": 24565, "total_steps": 37885, "loss": 0.0, "lr": 6.633053492594232e-07, "epoch": 3.242048304078131, "percentage": 64.84, "elapsed_time": "0:35:48", "remaining_time": "0:19:24", "throughput": 5629.41, "total_tokens": 12094016} +{"current_steps": 24570, "total_steps": 37885, "loss": 0.0008, "lr": 6.628715852531179e-07, "epoch": 3.2427081958558794, "percentage": 64.85, "elapsed_time": "0:35:48", "remaining_time": "0:19:24", "throughput": 5629.68, "total_tokens": 12096448} +{"current_steps": 24575, "total_steps": 37885, "loss": 0.0, "lr": 6.624378927998773e-07, "epoch": 3.243368087633628, "percentage": 64.87, "elapsed_time": "0:35:49", "remaining_time": "0:19:23", "throughput": 5630.01, "total_tokens": 12099008} +{"current_steps": 24580, "total_steps": 37885, "loss": 0.0898, "lr": 6.620042719917495e-07, "epoch": 3.2440279794113764, "percentage": 64.88, "elapsed_time": "0:35:49", "remaining_time": "0:19:23", "throughput": 5630.2, "total_tokens": 12101248} +{"current_steps": 24585, "total_steps": 37885, "loss": 0.0, "lr": 6.615707229207674e-07, "epoch": 3.244687871189125, "percentage": 64.89, "elapsed_time": "0:35:49", "remaining_time": "0:19:22", "throughput": 5630.51, "total_tokens": 12103744} +{"current_steps": 24590, "total_steps": 37885, "loss": 0.0, "lr": 6.611372456789486e-07, "epoch": 3.2453477629668734, "percentage": 64.91, "elapsed_time": "0:35:50", "remaining_time": "0:19:22", "throughput": 5630.92, "total_tokens": 12106496} +{"current_steps": 24595, "total_steps": 37885, "loss": 0.0004, "lr": 6.607038403582956e-07, "epoch": 3.2460076547446217, "percentage": 64.92, "elapsed_time": "0:35:50", "remaining_time": "0:19:21", "throughput": 5631.19, "total_tokens": 12108928} +{"current_steps": 24600, "total_steps": 37885, "loss": 0.0, "lr": 6.602705070507954e-07, "epoch": 3.2466675465223704, "percentage": 64.93, "elapsed_time": "0:35:50", "remaining_time": "0:19:21", "throughput": 5631.52, "total_tokens": 12111488} +{"current_steps": 24605, "total_steps": 37885, "loss": 0.0, "lr": 6.598372458484202e-07, "epoch": 3.2473274383001187, "percentage": 64.95, "elapsed_time": "0:35:50", "remaining_time": "0:19:20", "throughput": 5631.87, "total_tokens": 12114112} +{"current_steps": 24610, "total_steps": 37885, "loss": 0.0266, "lr": 6.594040568431262e-07, "epoch": 3.2479873300778674, "percentage": 64.96, "elapsed_time": "0:35:51", "remaining_time": "0:19:20", "throughput": 5632.06, "total_tokens": 12116352} +{"current_steps": 24615, "total_steps": 37885, "loss": 0.0909, "lr": 6.589709401268546e-07, "epoch": 3.2486472218556157, "percentage": 64.97, "elapsed_time": "0:35:51", "remaining_time": "0:19:19", "throughput": 5632.42, "total_tokens": 12118976} +{"current_steps": 24620, "total_steps": 37885, "loss": 0.0822, "lr": 6.585378957915315e-07, "epoch": 3.249307113633364, "percentage": 64.99, "elapsed_time": "0:35:51", "remaining_time": "0:19:19", "throughput": 5632.61, "total_tokens": 12121216} +{"current_steps": 24625, "total_steps": 37885, "loss": 0.0, "lr": 6.581049239290672e-07, "epoch": 3.2499670054111127, "percentage": 65.0, "elapsed_time": "0:35:52", "remaining_time": "0:19:18", "throughput": 5632.91, "total_tokens": 12123712} +{"current_steps": 24630, "total_steps": 37885, "loss": 0.0, "lr": 6.576720246313572e-07, "epoch": 3.250626897188861, "percentage": 65.01, "elapsed_time": "0:35:52", "remaining_time": "0:19:18", "throughput": 5633.13, "total_tokens": 12126016} +{"current_steps": 24635, "total_steps": 37885, "loss": 0.0352, "lr": 6.57239197990281e-07, "epoch": 3.2512867889666097, "percentage": 65.03, "elapsed_time": "0:35:52", "remaining_time": "0:19:17", "throughput": 5633.4, "total_tokens": 12128448} +{"current_steps": 24635, "total_steps": 37885, "eval_loss": 0.18092882633209229, "epoch": 3.2512867889666097, "percentage": 65.03, "elapsed_time": "0:36:00", "remaining_time": "0:19:22", "throughput": 5612.96, "total_tokens": 12128448} +{"current_steps": 24640, "total_steps": 37885, "loss": 0.0434, "lr": 6.568064440977028e-07, "epoch": 3.251946680744358, "percentage": 65.04, "elapsed_time": "0:36:35", "remaining_time": "0:19:40", "throughput": 5524.63, "total_tokens": 12130880} +{"current_steps": 24645, "total_steps": 37885, "loss": 0.0001, "lr": 6.563737630454719e-07, "epoch": 3.2526065725221063, "percentage": 65.05, "elapsed_time": "0:36:36", "remaining_time": "0:19:39", "throughput": 5524.86, "total_tokens": 12133248} +{"current_steps": 24650, "total_steps": 37885, "loss": 0.0, "lr": 6.559411549254211e-07, "epoch": 3.253266464299855, "percentage": 65.07, "elapsed_time": "0:36:36", "remaining_time": "0:19:39", "throughput": 5525.04, "total_tokens": 12135488} +{"current_steps": 24655, "total_steps": 37885, "loss": 0.0661, "lr": 6.55508619829369e-07, "epoch": 3.2539263560776033, "percentage": 65.08, "elapsed_time": "0:36:36", "remaining_time": "0:19:38", "throughput": 5525.31, "total_tokens": 12137920} +{"current_steps": 24660, "total_steps": 37885, "loss": 0.0001, "lr": 6.550761578491175e-07, "epoch": 3.2545862478553516, "percentage": 65.09, "elapsed_time": "0:36:37", "remaining_time": "0:19:38", "throughput": 5525.6, "total_tokens": 12140416} +{"current_steps": 24665, "total_steps": 37885, "loss": 0.0338, "lr": 6.546437690764539e-07, "epoch": 3.2552461396331003, "percentage": 65.1, "elapsed_time": "0:36:37", "remaining_time": "0:19:37", "throughput": 5525.94, "total_tokens": 12143040} +{"current_steps": 24670, "total_steps": 37885, "loss": 0.0002, "lr": 6.542114536031498e-07, "epoch": 3.2559060314108486, "percentage": 65.12, "elapsed_time": "0:36:37", "remaining_time": "0:19:37", "throughput": 5526.12, "total_tokens": 12145280} +{"current_steps": 24675, "total_steps": 37885, "loss": 0.0611, "lr": 6.537792115209599e-07, "epoch": 3.256565923188597, "percentage": 65.13, "elapsed_time": "0:36:38", "remaining_time": "0:19:36", "throughput": 5526.4, "total_tokens": 12147776} +{"current_steps": 24680, "total_steps": 37885, "loss": 0.0, "lr": 6.533470429216258e-07, "epoch": 3.2572258149663456, "percentage": 65.14, "elapsed_time": "0:36:38", "remaining_time": "0:19:36", "throughput": 5526.69, "total_tokens": 12150272} +{"current_steps": 24685, "total_steps": 37885, "loss": 0.0004, "lr": 6.529149478968709e-07, "epoch": 3.257885706744094, "percentage": 65.16, "elapsed_time": "0:36:38", "remaining_time": "0:19:35", "throughput": 5526.99, "total_tokens": 12152768} +{"current_steps": 24690, "total_steps": 37885, "loss": 0.0018, "lr": 6.524829265384058e-07, "epoch": 3.2585455985218426, "percentage": 65.17, "elapsed_time": "0:36:39", "remaining_time": "0:19:35", "throughput": 5527.2, "total_tokens": 12155072} +{"current_steps": 24695, "total_steps": 37885, "loss": 0.0355, "lr": 6.520509789379227e-07, "epoch": 3.259205490299591, "percentage": 65.18, "elapsed_time": "0:36:39", "remaining_time": "0:19:34", "throughput": 5527.41, "total_tokens": 12157376} +{"current_steps": 24700, "total_steps": 37885, "loss": 0.0019, "lr": 6.516191051870992e-07, "epoch": 3.259865382077339, "percentage": 65.2, "elapsed_time": "0:36:39", "remaining_time": "0:19:34", "throughput": 5527.58, "total_tokens": 12159616} +{"current_steps": 24705, "total_steps": 37885, "loss": 0.0296, "lr": 6.511873053775985e-07, "epoch": 3.260525273855088, "percentage": 65.21, "elapsed_time": "0:36:40", "remaining_time": "0:19:33", "throughput": 5527.78, "total_tokens": 12161920} +{"current_steps": 24710, "total_steps": 37885, "loss": 0.0564, "lr": 6.507555796010658e-07, "epoch": 3.261185165632836, "percentage": 65.22, "elapsed_time": "0:36:40", "remaining_time": "0:19:33", "throughput": 5527.95, "total_tokens": 12164160} +{"current_steps": 24715, "total_steps": 37885, "loss": 0.0615, "lr": 6.503239279491328e-07, "epoch": 3.261845057410585, "percentage": 65.24, "elapsed_time": "0:36:40", "remaining_time": "0:19:32", "throughput": 5528.17, "total_tokens": 12166464} +{"current_steps": 24720, "total_steps": 37885, "loss": 0.0311, "lr": 6.498923505134138e-07, "epoch": 3.262504949188333, "percentage": 65.25, "elapsed_time": "0:36:41", "remaining_time": "0:19:32", "throughput": 5528.47, "total_tokens": 12168960} +{"current_steps": 24725, "total_steps": 37885, "loss": 0.0202, "lr": 6.494608473855079e-07, "epoch": 3.2631648409660814, "percentage": 65.26, "elapsed_time": "0:36:41", "remaining_time": "0:19:31", "throughput": 5528.85, "total_tokens": 12171648} +{"current_steps": 24730, "total_steps": 37885, "loss": 0.0, "lr": 6.490294186569989e-07, "epoch": 3.26382473274383, "percentage": 65.28, "elapsed_time": "0:36:41", "remaining_time": "0:19:31", "throughput": 5529.26, "total_tokens": 12174400} +{"current_steps": 24735, "total_steps": 37885, "loss": 0.0045, "lr": 6.485980644194541e-07, "epoch": 3.2644846245215784, "percentage": 65.29, "elapsed_time": "0:36:42", "remaining_time": "0:19:30", "throughput": 5529.48, "total_tokens": 12176704} +{"current_steps": 24740, "total_steps": 37885, "loss": 0.0608, "lr": 6.481667847644256e-07, "epoch": 3.265144516299327, "percentage": 65.3, "elapsed_time": "0:36:42", "remaining_time": "0:19:30", "throughput": 5529.71, "total_tokens": 12179008} +{"current_steps": 24745, "total_steps": 37885, "loss": 0.0, "lr": 6.477355797834494e-07, "epoch": 3.2658044080770754, "percentage": 65.32, "elapsed_time": "0:36:42", "remaining_time": "0:19:29", "throughput": 5530.07, "total_tokens": 12181632} +{"current_steps": 24750, "total_steps": 37885, "loss": 0.0, "lr": 6.473044495680451e-07, "epoch": 3.2664642998548237, "percentage": 65.33, "elapsed_time": "0:36:43", "remaining_time": "0:19:29", "throughput": 5530.33, "total_tokens": 12184000} +{"current_steps": 24755, "total_steps": 37885, "loss": 0.0241, "lr": 6.468733942097178e-07, "epoch": 3.2671241916325724, "percentage": 65.34, "elapsed_time": "0:36:43", "remaining_time": "0:19:28", "throughput": 5530.58, "total_tokens": 12186368} +{"current_steps": 24760, "total_steps": 37885, "loss": 0.0, "lr": 6.464424137999551e-07, "epoch": 3.2677840834103207, "percentage": 65.36, "elapsed_time": "0:36:43", "remaining_time": "0:19:28", "throughput": 5530.81, "total_tokens": 12188672} +{"current_steps": 24765, "total_steps": 37885, "loss": 0.0017, "lr": 6.4601150843023e-07, "epoch": 3.2684439751880694, "percentage": 65.37, "elapsed_time": "0:36:44", "remaining_time": "0:19:27", "throughput": 5530.95, "total_tokens": 12190784} +{"current_steps": 24770, "total_steps": 37885, "loss": 0.0001, "lr": 6.455806781919988e-07, "epoch": 3.2691038669658177, "percentage": 65.38, "elapsed_time": "0:36:44", "remaining_time": "0:19:27", "throughput": 5531.23, "total_tokens": 12193216} +{"current_steps": 24775, "total_steps": 37885, "loss": 0.0, "lr": 6.451499231767021e-07, "epoch": 3.269763758743566, "percentage": 65.4, "elapsed_time": "0:36:44", "remaining_time": "0:19:26", "throughput": 5531.53, "total_tokens": 12195712} +{"current_steps": 24780, "total_steps": 37885, "loss": 0.0023, "lr": 6.447192434757647e-07, "epoch": 3.2704236505213147, "percentage": 65.41, "elapsed_time": "0:36:45", "remaining_time": "0:19:26", "throughput": 5531.75, "total_tokens": 12198016} +{"current_steps": 24785, "total_steps": 37885, "loss": 0.0, "lr": 6.442886391805948e-07, "epoch": 3.271083542299063, "percentage": 65.42, "elapsed_time": "0:36:45", "remaining_time": "0:19:25", "throughput": 5532.06, "total_tokens": 12200512} +{"current_steps": 24790, "total_steps": 37885, "loss": 0.0002, "lr": 6.438581103825858e-07, "epoch": 3.2717434340768112, "percentage": 65.43, "elapsed_time": "0:36:45", "remaining_time": "0:19:25", "throughput": 5532.44, "total_tokens": 12203200} +{"current_steps": 24795, "total_steps": 37885, "loss": 0.0, "lr": 6.434276571731139e-07, "epoch": 3.27240332585456, "percentage": 65.45, "elapsed_time": "0:36:46", "remaining_time": "0:19:24", "throughput": 5532.83, "total_tokens": 12205888} +{"current_steps": 24800, "total_steps": 37885, "loss": 0.0006, "lr": 6.429972796435392e-07, "epoch": 3.2730632176323082, "percentage": 65.46, "elapsed_time": "0:36:46", "remaining_time": "0:19:24", "throughput": 5533.16, "total_tokens": 12208448} +{"current_steps": 24805, "total_steps": 37885, "loss": 0.0005, "lr": 6.425669778852072e-07, "epoch": 3.2737231094100565, "percentage": 65.47, "elapsed_time": "0:36:46", "remaining_time": "0:19:23", "throughput": 5533.4, "total_tokens": 12210816} +{"current_steps": 24810, "total_steps": 37885, "loss": 0.0, "lr": 6.421367519894454e-07, "epoch": 3.2743830011878052, "percentage": 65.49, "elapsed_time": "0:36:47", "remaining_time": "0:19:23", "throughput": 5533.73, "total_tokens": 12213376} +{"current_steps": 24815, "total_steps": 37885, "loss": 0.0019, "lr": 6.417066020475669e-07, "epoch": 3.2750428929655535, "percentage": 65.5, "elapsed_time": "0:36:47", "remaining_time": "0:19:22", "throughput": 5534.14, "total_tokens": 12216128} +{"current_steps": 24820, "total_steps": 37885, "loss": 0.0002, "lr": 6.412765281508677e-07, "epoch": 3.2757027847433022, "percentage": 65.51, "elapsed_time": "0:36:47", "remaining_time": "0:19:22", "throughput": 5534.36, "total_tokens": 12218432} +{"current_steps": 24825, "total_steps": 37885, "loss": 0.0, "lr": 6.408465303906271e-07, "epoch": 3.2763626765210505, "percentage": 65.53, "elapsed_time": "0:36:48", "remaining_time": "0:19:21", "throughput": 5534.82, "total_tokens": 12221312} +{"current_steps": 24830, "total_steps": 37885, "loss": 0.0, "lr": 6.404166088581102e-07, "epoch": 3.277022568298799, "percentage": 65.54, "elapsed_time": "0:36:48", "remaining_time": "0:19:21", "throughput": 5535.07, "total_tokens": 12223680} +{"current_steps": 24835, "total_steps": 37885, "loss": 0.0487, "lr": 6.399867636445637e-07, "epoch": 3.2776824600765475, "percentage": 65.55, "elapsed_time": "0:36:48", "remaining_time": "0:19:20", "throughput": 5535.45, "total_tokens": 12226368} +{"current_steps": 24840, "total_steps": 37885, "loss": 0.0002, "lr": 6.395569948412198e-07, "epoch": 3.278342351854296, "percentage": 65.57, "elapsed_time": "0:36:49", "remaining_time": "0:19:20", "throughput": 5535.7, "total_tokens": 12228736} +{"current_steps": 24845, "total_steps": 37885, "loss": 0.0559, "lr": 6.39127302539294e-07, "epoch": 3.2790022436320445, "percentage": 65.58, "elapsed_time": "0:36:49", "remaining_time": "0:19:19", "throughput": 5536.05, "total_tokens": 12231360} +{"current_steps": 24850, "total_steps": 37885, "loss": 0.2746, "lr": 6.386976868299844e-07, "epoch": 3.279662135409793, "percentage": 65.59, "elapsed_time": "0:36:49", "remaining_time": "0:19:19", "throughput": 5536.49, "total_tokens": 12234176} +{"current_steps": 24855, "total_steps": 37885, "loss": 0.0001, "lr": 6.382681478044749e-07, "epoch": 3.280322027187541, "percentage": 65.61, "elapsed_time": "0:36:50", "remaining_time": "0:19:18", "throughput": 5536.75, "total_tokens": 12236544} +{"current_steps": 24860, "total_steps": 37885, "loss": 0.0001, "lr": 6.378386855539311e-07, "epoch": 3.28098191896529, "percentage": 65.62, "elapsed_time": "0:36:50", "remaining_time": "0:19:18", "throughput": 5537.06, "total_tokens": 12239040} +{"current_steps": 24865, "total_steps": 37885, "loss": 0.08, "lr": 6.374093001695042e-07, "epoch": 3.281641810743038, "percentage": 65.63, "elapsed_time": "0:36:50", "remaining_time": "0:19:17", "throughput": 5537.31, "total_tokens": 12241408} +{"current_steps": 24870, "total_steps": 37885, "loss": 0.0337, "lr": 6.369799917423277e-07, "epoch": 3.282301702520787, "percentage": 65.65, "elapsed_time": "0:36:51", "remaining_time": "0:19:17", "throughput": 5537.59, "total_tokens": 12243840} +{"current_steps": 24875, "total_steps": 37885, "loss": 0.0001, "lr": 6.365507603635188e-07, "epoch": 3.282961594298535, "percentage": 65.66, "elapsed_time": "0:36:51", "remaining_time": "0:19:16", "throughput": 5537.76, "total_tokens": 12246016} +{"current_steps": 24880, "total_steps": 37885, "loss": 0.0006, "lr": 6.361216061241792e-07, "epoch": 3.2836214860762833, "percentage": 65.67, "elapsed_time": "0:36:51", "remaining_time": "0:19:16", "throughput": 5537.99, "total_tokens": 12248320} +{"current_steps": 24885, "total_steps": 37885, "loss": 0.0292, "lr": 6.356925291153936e-07, "epoch": 3.284281377854032, "percentage": 65.69, "elapsed_time": "0:36:52", "remaining_time": "0:19:15", "throughput": 5538.39, "total_tokens": 12251072} +{"current_steps": 24890, "total_steps": 37885, "loss": 0.0011, "lr": 6.352635294282309e-07, "epoch": 3.2849412696317803, "percentage": 65.7, "elapsed_time": "0:36:52", "remaining_time": "0:19:15", "throughput": 5538.72, "total_tokens": 12253632} +{"current_steps": 24895, "total_steps": 37885, "loss": 0.0001, "lr": 6.348346071537427e-07, "epoch": 3.285601161409529, "percentage": 65.71, "elapsed_time": "0:36:52", "remaining_time": "0:19:14", "throughput": 5539.03, "total_tokens": 12256128} +{"current_steps": 24900, "total_steps": 37885, "loss": 0.0, "lr": 6.344057623829648e-07, "epoch": 3.2862610531872773, "percentage": 65.73, "elapsed_time": "0:36:53", "remaining_time": "0:19:14", "throughput": 5539.36, "total_tokens": 12258688} +{"current_steps": 24905, "total_steps": 37885, "loss": 0.0, "lr": 6.339769952069165e-07, "epoch": 3.2869209449650256, "percentage": 65.74, "elapsed_time": "0:36:53", "remaining_time": "0:19:13", "throughput": 5539.69, "total_tokens": 12261312} +{"current_steps": 24910, "total_steps": 37885, "loss": 0.0, "lr": 6.335483057166002e-07, "epoch": 3.2875808367427743, "percentage": 65.75, "elapsed_time": "0:36:53", "remaining_time": "0:19:13", "throughput": 5539.91, "total_tokens": 12263616} +{"current_steps": 24915, "total_steps": 37885, "loss": 0.0, "lr": 6.331196940030026e-07, "epoch": 3.2882407285205226, "percentage": 65.76, "elapsed_time": "0:36:54", "remaining_time": "0:19:12", "throughput": 5540.29, "total_tokens": 12266304} +{"current_steps": 24920, "total_steps": 37885, "loss": 0.0, "lr": 6.326911601570933e-07, "epoch": 3.288900620298271, "percentage": 65.78, "elapsed_time": "0:36:54", "remaining_time": "0:19:12", "throughput": 5540.5, "total_tokens": 12268608} +{"current_steps": 24925, "total_steps": 37885, "loss": 0.0, "lr": 6.322627042698251e-07, "epoch": 3.2895605120760196, "percentage": 65.79, "elapsed_time": "0:36:54", "remaining_time": "0:19:11", "throughput": 5540.88, "total_tokens": 12271296} +{"current_steps": 24930, "total_steps": 37885, "loss": 0.0, "lr": 6.318343264321352e-07, "epoch": 3.290220403853768, "percentage": 65.8, "elapsed_time": "0:36:55", "remaining_time": "0:19:11", "throughput": 5541.12, "total_tokens": 12273664} +{"current_steps": 24935, "total_steps": 37885, "loss": 0.0, "lr": 6.314060267349432e-07, "epoch": 3.290880295631516, "percentage": 65.82, "elapsed_time": "0:36:55", "remaining_time": "0:19:10", "throughput": 5541.44, "total_tokens": 12276224} +{"current_steps": 24940, "total_steps": 37885, "loss": 0.0551, "lr": 6.309778052691532e-07, "epoch": 3.291540187409265, "percentage": 65.83, "elapsed_time": "0:36:55", "remaining_time": "0:19:10", "throughput": 5541.7, "total_tokens": 12278656} +{"current_steps": 24945, "total_steps": 37885, "loss": 0.0, "lr": 6.305496621256516e-07, "epoch": 3.292200079187013, "percentage": 65.84, "elapsed_time": "0:36:56", "remaining_time": "0:19:09", "throughput": 5541.92, "total_tokens": 12280960} +{"current_steps": 24950, "total_steps": 37885, "loss": 0.0, "lr": 6.30121597395309e-07, "epoch": 3.292859970964762, "percentage": 65.86, "elapsed_time": "0:36:56", "remaining_time": "0:19:09", "throughput": 5542.34, "total_tokens": 12283776} +{"current_steps": 24955, "total_steps": 37885, "loss": 0.0, "lr": 6.296936111689789e-07, "epoch": 3.29351986274251, "percentage": 65.87, "elapsed_time": "0:36:56", "remaining_time": "0:19:08", "throughput": 5542.5, "total_tokens": 12285952} +{"current_steps": 24960, "total_steps": 37885, "loss": 0.0, "lr": 6.292657035374981e-07, "epoch": 3.2941797545202585, "percentage": 65.88, "elapsed_time": "0:36:57", "remaining_time": "0:19:08", "throughput": 5542.85, "total_tokens": 12288576} +{"current_steps": 24965, "total_steps": 37885, "loss": 0.028, "lr": 6.288378745916873e-07, "epoch": 3.294839646298007, "percentage": 65.9, "elapsed_time": "0:36:57", "remaining_time": "0:19:07", "throughput": 5543.19, "total_tokens": 12291200} +{"current_steps": 24970, "total_steps": 37885, "loss": 0.0, "lr": 6.284101244223497e-07, "epoch": 3.2954995380757555, "percentage": 65.91, "elapsed_time": "0:36:57", "remaining_time": "0:19:07", "throughput": 5543.42, "total_tokens": 12293568} +{"current_steps": 24975, "total_steps": 37885, "loss": 0.0001, "lr": 6.279824531202725e-07, "epoch": 3.296159429853504, "percentage": 65.92, "elapsed_time": "0:36:58", "remaining_time": "0:19:06", "throughput": 5543.71, "total_tokens": 12296064} +{"current_steps": 24980, "total_steps": 37885, "loss": 0.0266, "lr": 6.275548607762255e-07, "epoch": 3.2968193216312525, "percentage": 65.94, "elapsed_time": "0:36:58", "remaining_time": "0:19:06", "throughput": 5544.05, "total_tokens": 12298688} +{"current_steps": 24985, "total_steps": 37885, "loss": 0.0001, "lr": 6.271273474809624e-07, "epoch": 3.2974792134090007, "percentage": 65.95, "elapsed_time": "0:36:58", "remaining_time": "0:19:05", "throughput": 5544.25, "total_tokens": 12300992} +{"current_steps": 24990, "total_steps": 37885, "loss": 0.0, "lr": 6.266999133252196e-07, "epoch": 3.2981391051867495, "percentage": 65.96, "elapsed_time": "0:36:59", "remaining_time": "0:19:05", "throughput": 5544.62, "total_tokens": 12303680} +{"current_steps": 24995, "total_steps": 37885, "loss": 0.0, "lr": 6.262725583997169e-07, "epoch": 3.2987989969644977, "percentage": 65.98, "elapsed_time": "0:36:59", "remaining_time": "0:19:04", "throughput": 5544.72, "total_tokens": 12305728} +{"current_steps": 25000, "total_steps": 37885, "loss": 0.0, "lr": 6.258452827951576e-07, "epoch": 3.2994588887422465, "percentage": 65.99, "elapsed_time": "0:36:59", "remaining_time": "0:19:04", "throughput": 5544.95, "total_tokens": 12308096} +{"current_steps": 25005, "total_steps": 37885, "loss": 0.0004, "lr": 6.254180866022278e-07, "epoch": 3.3001187805199947, "percentage": 66.0, "elapsed_time": "0:37:00", "remaining_time": "0:19:03", "throughput": 5545.27, "total_tokens": 12310656} +{"current_steps": 25010, "total_steps": 37885, "loss": 0.0009, "lr": 6.249909699115958e-07, "epoch": 3.300778672297743, "percentage": 66.02, "elapsed_time": "0:37:00", "remaining_time": "0:19:03", "throughput": 5545.73, "total_tokens": 12313600} +{"current_steps": 25015, "total_steps": 37885, "loss": 0.0266, "lr": 6.245639328139156e-07, "epoch": 3.3014385640754917, "percentage": 66.03, "elapsed_time": "0:37:00", "remaining_time": "0:19:02", "throughput": 5545.91, "total_tokens": 12315840} +{"current_steps": 25020, "total_steps": 37885, "loss": 0.0, "lr": 6.241369753998213e-07, "epoch": 3.30209845585324, "percentage": 66.04, "elapsed_time": "0:37:01", "remaining_time": "0:19:02", "throughput": 5546.39, "total_tokens": 12318784} +{"current_steps": 25025, "total_steps": 37885, "loss": 0.0, "lr": 6.23710097759933e-07, "epoch": 3.3027583476309887, "percentage": 66.06, "elapsed_time": "0:37:01", "remaining_time": "0:19:01", "throughput": 5546.61, "total_tokens": 12321152} +{"current_steps": 25030, "total_steps": 37885, "loss": 0.0045, "lr": 6.232832999848511e-07, "epoch": 3.303418239408737, "percentage": 66.07, "elapsed_time": "0:37:01", "remaining_time": "0:19:01", "throughput": 5546.93, "total_tokens": 12323712} +{"current_steps": 25035, "total_steps": 37885, "loss": 0.0, "lr": 6.228565821651606e-07, "epoch": 3.3040781311864853, "percentage": 66.08, "elapsed_time": "0:37:02", "remaining_time": "0:19:00", "throughput": 5547.24, "total_tokens": 12326272} +{"current_steps": 25040, "total_steps": 37885, "loss": 0.0, "lr": 6.224299443914301e-07, "epoch": 3.304738022964234, "percentage": 66.09, "elapsed_time": "0:37:02", "remaining_time": "0:19:00", "throughput": 5547.58, "total_tokens": 12328896} +{"current_steps": 25045, "total_steps": 37885, "loss": 0.0, "lr": 6.22003386754209e-07, "epoch": 3.3053979147419823, "percentage": 66.11, "elapsed_time": "0:37:02", "remaining_time": "0:18:59", "throughput": 5547.84, "total_tokens": 12331328} +{"current_steps": 25050, "total_steps": 37885, "loss": 0.0323, "lr": 6.215769093440325e-07, "epoch": 3.3060578065197306, "percentage": 66.12, "elapsed_time": "0:37:03", "remaining_time": "0:18:59", "throughput": 5548.0, "total_tokens": 12333568} +{"current_steps": 25055, "total_steps": 37885, "loss": 0.0003, "lr": 6.211505122514165e-07, "epoch": 3.3067176982974793, "percentage": 66.13, "elapsed_time": "0:37:03", "remaining_time": "0:18:58", "throughput": 5548.28, "total_tokens": 12336064} +{"current_steps": 25060, "total_steps": 37885, "loss": 0.0002, "lr": 6.207241955668605e-07, "epoch": 3.3073775900752276, "percentage": 66.15, "elapsed_time": "0:37:03", "remaining_time": "0:18:58", "throughput": 5548.65, "total_tokens": 12338752} +{"current_steps": 25065, "total_steps": 37885, "loss": 0.0, "lr": 6.202979593808478e-07, "epoch": 3.3080374818529763, "percentage": 66.16, "elapsed_time": "0:37:04", "remaining_time": "0:18:57", "throughput": 5548.9, "total_tokens": 12341184} +{"current_steps": 25070, "total_steps": 37885, "loss": 0.0533, "lr": 6.198718037838435e-07, "epoch": 3.3086973736307246, "percentage": 66.17, "elapsed_time": "0:37:04", "remaining_time": "0:18:57", "throughput": 5549.1, "total_tokens": 12343488} +{"current_steps": 25075, "total_steps": 37885, "loss": 0.0465, "lr": 6.194457288662963e-07, "epoch": 3.309357265408473, "percentage": 66.19, "elapsed_time": "0:37:04", "remaining_time": "0:18:56", "throughput": 5549.46, "total_tokens": 12346176} +{"current_steps": 25080, "total_steps": 37885, "loss": 0.0, "lr": 6.190197347186374e-07, "epoch": 3.3100171571862216, "percentage": 66.2, "elapsed_time": "0:37:05", "remaining_time": "0:18:56", "throughput": 5549.68, "total_tokens": 12348480} +{"current_steps": 25085, "total_steps": 37885, "loss": 0.0001, "lr": 6.185938214312808e-07, "epoch": 3.31067704896397, "percentage": 66.21, "elapsed_time": "0:37:05", "remaining_time": "0:18:55", "throughput": 5549.9, "total_tokens": 12350848} +{"current_steps": 25090, "total_steps": 37885, "loss": 0.0, "lr": 6.181679890946238e-07, "epoch": 3.311336940741718, "percentage": 66.23, "elapsed_time": "0:37:05", "remaining_time": "0:18:55", "throughput": 5550.23, "total_tokens": 12353472} +{"current_steps": 25095, "total_steps": 37885, "loss": 0.0, "lr": 6.17742237799046e-07, "epoch": 3.311996832519467, "percentage": 66.24, "elapsed_time": "0:37:06", "remaining_time": "0:18:54", "throughput": 5550.61, "total_tokens": 12356224} +{"current_steps": 25100, "total_steps": 37885, "loss": 0.0049, "lr": 6.173165676349102e-07, "epoch": 3.312656724297215, "percentage": 66.25, "elapsed_time": "0:37:06", "remaining_time": "0:18:54", "throughput": 5550.93, "total_tokens": 12358784} +{"current_steps": 25105, "total_steps": 37885, "loss": 0.0, "lr": 6.168909786925619e-07, "epoch": 3.313316616074964, "percentage": 66.27, "elapsed_time": "0:37:06", "remaining_time": "0:18:53", "throughput": 5551.14, "total_tokens": 12361088} +{"current_steps": 25110, "total_steps": 37885, "loss": 0.0, "lr": 6.164654710623289e-07, "epoch": 3.313976507852712, "percentage": 66.28, "elapsed_time": "0:37:07", "remaining_time": "0:18:53", "throughput": 5551.37, "total_tokens": 12363456} +{"current_steps": 25115, "total_steps": 37885, "loss": 0.0001, "lr": 6.160400448345224e-07, "epoch": 3.3146363996304604, "percentage": 66.29, "elapsed_time": "0:37:07", "remaining_time": "0:18:52", "throughput": 5551.68, "total_tokens": 12366016} +{"current_steps": 25120, "total_steps": 37885, "loss": 0.0005, "lr": 6.156147000994358e-07, "epoch": 3.315296291408209, "percentage": 66.31, "elapsed_time": "0:37:07", "remaining_time": "0:18:52", "throughput": 5552.0, "total_tokens": 12368576} +{"current_steps": 25125, "total_steps": 37885, "loss": 0.0002, "lr": 6.151894369473459e-07, "epoch": 3.3159561831859574, "percentage": 66.32, "elapsed_time": "0:37:08", "remaining_time": "0:18:51", "throughput": 5552.27, "total_tokens": 12371008} +{"current_steps": 25130, "total_steps": 37885, "loss": 0.0, "lr": 6.147642554685112e-07, "epoch": 3.316616074963706, "percentage": 66.33, "elapsed_time": "0:37:08", "remaining_time": "0:18:51", "throughput": 5552.49, "total_tokens": 12373376} +{"current_steps": 25135, "total_steps": 37885, "loss": 0.0, "lr": 6.143391557531738e-07, "epoch": 3.3172759667414544, "percentage": 66.35, "elapsed_time": "0:37:08", "remaining_time": "0:18:50", "throughput": 5552.84, "total_tokens": 12376064} +{"current_steps": 25140, "total_steps": 37885, "loss": 0.0061, "lr": 6.139141378915578e-07, "epoch": 3.3179358585192027, "percentage": 66.36, "elapsed_time": "0:37:09", "remaining_time": "0:18:50", "throughput": 5553.13, "total_tokens": 12378560} +{"current_steps": 25145, "total_steps": 37885, "loss": 0.0, "lr": 6.1348920197387e-07, "epoch": 3.3185957502969514, "percentage": 66.37, "elapsed_time": "0:37:09", "remaining_time": "0:18:49", "throughput": 5553.34, "total_tokens": 12380928} +{"current_steps": 25150, "total_steps": 37885, "loss": 0.0002, "lr": 6.130643480903005e-07, "epoch": 3.3192556420746997, "percentage": 66.39, "elapsed_time": "0:37:09", "remaining_time": "0:18:49", "throughput": 5553.6, "total_tokens": 12383360} +{"current_steps": 25155, "total_steps": 37885, "loss": 0.0082, "lr": 6.126395763310213e-07, "epoch": 3.3199155338524484, "percentage": 66.4, "elapsed_time": "0:37:10", "remaining_time": "0:18:48", "throughput": 5553.92, "total_tokens": 12385920} +{"current_steps": 25160, "total_steps": 37885, "loss": 0.0308, "lr": 6.122148867861864e-07, "epoch": 3.3205754256301967, "percentage": 66.41, "elapsed_time": "0:37:10", "remaining_time": "0:18:48", "throughput": 5554.21, "total_tokens": 12388416} +{"current_steps": 25165, "total_steps": 37885, "loss": 0.02, "lr": 6.117902795459342e-07, "epoch": 3.321235317407945, "percentage": 66.42, "elapsed_time": "0:37:10", "remaining_time": "0:18:47", "throughput": 5554.51, "total_tokens": 12390976} +{"current_steps": 25170, "total_steps": 37885, "loss": 0.0, "lr": 6.113657547003834e-07, "epoch": 3.3218952091856937, "percentage": 66.44, "elapsed_time": "0:37:11", "remaining_time": "0:18:47", "throughput": 5554.79, "total_tokens": 12393472} +{"current_steps": 25175, "total_steps": 37885, "loss": 0.0, "lr": 6.109413123396374e-07, "epoch": 3.322555100963442, "percentage": 66.45, "elapsed_time": "0:37:11", "remaining_time": "0:18:46", "throughput": 5555.18, "total_tokens": 12396224} +{"current_steps": 25180, "total_steps": 37885, "loss": 0.0266, "lr": 6.105169525537805e-07, "epoch": 3.3232149927411903, "percentage": 66.46, "elapsed_time": "0:37:11", "remaining_time": "0:18:46", "throughput": 5555.44, "total_tokens": 12398656} +{"current_steps": 25185, "total_steps": 37885, "loss": 0.0, "lr": 6.100926754328797e-07, "epoch": 3.323874884518939, "percentage": 66.48, "elapsed_time": "0:37:12", "remaining_time": "0:18:45", "throughput": 5555.64, "total_tokens": 12400960} +{"current_steps": 25190, "total_steps": 37885, "loss": 0.0, "lr": 6.096684810669855e-07, "epoch": 3.3245347762966873, "percentage": 66.49, "elapsed_time": "0:37:12", "remaining_time": "0:18:45", "throughput": 5555.97, "total_tokens": 12403584} +{"current_steps": 25195, "total_steps": 37885, "loss": 0.0352, "lr": 6.092443695461289e-07, "epoch": 3.325194668074436, "percentage": 66.5, "elapsed_time": "0:37:12", "remaining_time": "0:18:44", "throughput": 5556.28, "total_tokens": 12406144} +{"current_steps": 25200, "total_steps": 37885, "loss": 0.0, "lr": 6.08820340960326e-07, "epoch": 3.3258545598521843, "percentage": 66.52, "elapsed_time": "0:37:13", "remaining_time": "0:18:44", "throughput": 5556.5, "total_tokens": 12408512} +{"current_steps": 25205, "total_steps": 37885, "loss": 0.0, "lr": 6.083963953995728e-07, "epoch": 3.3265144516299325, "percentage": 66.53, "elapsed_time": "0:37:13", "remaining_time": "0:18:43", "throughput": 5556.83, "total_tokens": 12411136} +{"current_steps": 25210, "total_steps": 37885, "loss": 0.0005, "lr": 6.079725329538486e-07, "epoch": 3.3271743434076813, "percentage": 66.54, "elapsed_time": "0:37:13", "remaining_time": "0:18:43", "throughput": 5556.98, "total_tokens": 12413312} +{"current_steps": 25215, "total_steps": 37885, "loss": 0.028, "lr": 6.075487537131158e-07, "epoch": 3.3278342351854295, "percentage": 66.56, "elapsed_time": "0:37:14", "remaining_time": "0:18:42", "throughput": 5557.24, "total_tokens": 12415744} +{"current_steps": 25220, "total_steps": 37885, "loss": 0.0006, "lr": 6.071250577673179e-07, "epoch": 3.328494126963178, "percentage": 66.57, "elapsed_time": "0:37:14", "remaining_time": "0:18:42", "throughput": 5557.47, "total_tokens": 12418112} +{"current_steps": 25225, "total_steps": 37885, "loss": 0.0, "lr": 6.067014452063816e-07, "epoch": 3.3291540187409265, "percentage": 66.58, "elapsed_time": "0:37:14", "remaining_time": "0:18:41", "throughput": 5557.77, "total_tokens": 12420672} +{"current_steps": 25230, "total_steps": 37885, "loss": 0.0, "lr": 6.062779161202156e-07, "epoch": 3.329813910518675, "percentage": 66.6, "elapsed_time": "0:37:15", "remaining_time": "0:18:41", "throughput": 5557.91, "total_tokens": 12422848} +{"current_steps": 25235, "total_steps": 37885, "loss": 0.0, "lr": 6.058544705987105e-07, "epoch": 3.3304738022964235, "percentage": 66.61, "elapsed_time": "0:37:15", "remaining_time": "0:18:40", "throughput": 5558.17, "total_tokens": 12425280} +{"current_steps": 25240, "total_steps": 37885, "loss": 0.0366, "lr": 6.0543110873174e-07, "epoch": 3.331133694074172, "percentage": 66.62, "elapsed_time": "0:37:15", "remaining_time": "0:18:40", "throughput": 5558.43, "total_tokens": 12427712} +{"current_steps": 25245, "total_steps": 37885, "loss": 0.0, "lr": 6.050078306091595e-07, "epoch": 3.33179358585192, "percentage": 66.64, "elapsed_time": "0:37:16", "remaining_time": "0:18:39", "throughput": 5558.84, "total_tokens": 12430528} +{"current_steps": 25250, "total_steps": 37885, "loss": 0.0001, "lr": 6.045846363208066e-07, "epoch": 3.332453477629669, "percentage": 66.65, "elapsed_time": "0:37:16", "remaining_time": "0:18:39", "throughput": 5559.01, "total_tokens": 12432768} +{"current_steps": 25255, "total_steps": 37885, "loss": 0.0, "lr": 6.041615259565014e-07, "epoch": 3.333113369407417, "percentage": 66.66, "elapsed_time": "0:37:16", "remaining_time": "0:18:38", "throughput": 5559.34, "total_tokens": 12435392} +{"current_steps": 25260, "total_steps": 37885, "loss": 0.0, "lr": 6.037384996060455e-07, "epoch": 3.333773261185166, "percentage": 66.68, "elapsed_time": "0:37:17", "remaining_time": "0:18:38", "throughput": 5559.5, "total_tokens": 12437568} +{"current_steps": 25265, "total_steps": 37885, "loss": 0.0266, "lr": 6.033155573592239e-07, "epoch": 3.334433152962914, "percentage": 66.69, "elapsed_time": "0:37:17", "remaining_time": "0:18:37", "throughput": 5559.66, "total_tokens": 12439744} +{"current_steps": 25270, "total_steps": 37885, "loss": 0.0252, "lr": 6.028926993058026e-07, "epoch": 3.3350930447406624, "percentage": 66.7, "elapsed_time": "0:37:17", "remaining_time": "0:18:37", "throughput": 5559.87, "total_tokens": 12442048} +{"current_steps": 25275, "total_steps": 37885, "loss": 0.0001, "lr": 6.024699255355302e-07, "epoch": 3.335752936518411, "percentage": 66.72, "elapsed_time": "0:37:18", "remaining_time": "0:18:36", "throughput": 5560.35, "total_tokens": 12444992} +{"current_steps": 25280, "total_steps": 37885, "loss": 0.0002, "lr": 6.020472361381374e-07, "epoch": 3.3364128282961594, "percentage": 66.73, "elapsed_time": "0:37:18", "remaining_time": "0:18:36", "throughput": 5560.57, "total_tokens": 12447296} +{"current_steps": 25285, "total_steps": 37885, "loss": 0.0, "lr": 6.016246312033371e-07, "epoch": 3.337072720073908, "percentage": 66.74, "elapsed_time": "0:37:18", "remaining_time": "0:18:35", "throughput": 5560.91, "total_tokens": 12449920} +{"current_steps": 25290, "total_steps": 37885, "loss": 0.0294, "lr": 6.01202110820824e-07, "epoch": 3.3377326118516564, "percentage": 66.75, "elapsed_time": "0:37:19", "remaining_time": "0:18:35", "throughput": 5561.2, "total_tokens": 12452416} +{"current_steps": 25295, "total_steps": 37885, "loss": 0.0736, "lr": 6.007796750802748e-07, "epoch": 3.3383925036294047, "percentage": 66.77, "elapsed_time": "0:37:19", "remaining_time": "0:18:34", "throughput": 5561.45, "total_tokens": 12454784} +{"current_steps": 25300, "total_steps": 37885, "loss": 0.0, "lr": 6.003573240713489e-07, "epoch": 3.3390523954071534, "percentage": 66.78, "elapsed_time": "0:37:19", "remaining_time": "0:18:34", "throughput": 5561.85, "total_tokens": 12457536} +{"current_steps": 25305, "total_steps": 37885, "loss": 0.0004, "lr": 5.999350578836868e-07, "epoch": 3.3397122871849017, "percentage": 66.79, "elapsed_time": "0:37:20", "remaining_time": "0:18:33", "throughput": 5562.15, "total_tokens": 12460032} +{"current_steps": 25310, "total_steps": 37885, "loss": 0.0, "lr": 5.995128766069118e-07, "epoch": 3.3403721789626504, "percentage": 66.81, "elapsed_time": "0:37:20", "remaining_time": "0:18:33", "throughput": 5562.37, "total_tokens": 12462336} +{"current_steps": 25315, "total_steps": 37885, "loss": 0.0813, "lr": 5.990907803306286e-07, "epoch": 3.3410320707403987, "percentage": 66.82, "elapsed_time": "0:37:20", "remaining_time": "0:18:32", "throughput": 5562.72, "total_tokens": 12464960} +{"current_steps": 25320, "total_steps": 37885, "loss": 0.0001, "lr": 5.986687691444239e-07, "epoch": 3.341691962518147, "percentage": 66.83, "elapsed_time": "0:37:21", "remaining_time": "0:18:32", "throughput": 5563.1, "total_tokens": 12467648} +{"current_steps": 25325, "total_steps": 37885, "loss": 0.0, "lr": 5.98246843137867e-07, "epoch": 3.3423518542958957, "percentage": 66.85, "elapsed_time": "0:37:21", "remaining_time": "0:18:31", "throughput": 5563.4, "total_tokens": 12470144} +{"current_steps": 25330, "total_steps": 37885, "loss": 0.0001, "lr": 5.978250024005082e-07, "epoch": 3.343011746073644, "percentage": 66.86, "elapsed_time": "0:37:21", "remaining_time": "0:18:31", "throughput": 5563.64, "total_tokens": 12472512} +{"current_steps": 25335, "total_steps": 37885, "loss": 0.0, "lr": 5.974032470218804e-07, "epoch": 3.343671637851392, "percentage": 66.87, "elapsed_time": "0:37:22", "remaining_time": "0:18:30", "throughput": 5563.89, "total_tokens": 12474880} +{"current_steps": 25340, "total_steps": 37885, "loss": 0.0502, "lr": 5.969815770914983e-07, "epoch": 3.344331529629141, "percentage": 66.89, "elapsed_time": "0:37:22", "remaining_time": "0:18:30", "throughput": 5564.28, "total_tokens": 12477632} +{"current_steps": 25345, "total_steps": 37885, "loss": 0.0, "lr": 5.965599926988575e-07, "epoch": 3.344991421406889, "percentage": 66.9, "elapsed_time": "0:37:22", "remaining_time": "0:18:29", "throughput": 5564.81, "total_tokens": 12480704} +{"current_steps": 25350, "total_steps": 37885, "loss": 0.0003, "lr": 5.961384939334373e-07, "epoch": 3.3456513131846375, "percentage": 66.91, "elapsed_time": "0:37:23", "remaining_time": "0:18:29", "throughput": 5565.11, "total_tokens": 12483200} +{"current_steps": 25355, "total_steps": 37885, "loss": 0.0011, "lr": 5.957170808846968e-07, "epoch": 3.346311204962386, "percentage": 66.93, "elapsed_time": "0:37:23", "remaining_time": "0:18:28", "throughput": 5565.32, "total_tokens": 12485504} +{"current_steps": 25360, "total_steps": 37885, "loss": 0.0, "lr": 5.952957536420786e-07, "epoch": 3.3469710967401345, "percentage": 66.94, "elapsed_time": "0:37:23", "remaining_time": "0:18:28", "throughput": 5565.57, "total_tokens": 12487872} +{"current_steps": 25365, "total_steps": 37885, "loss": 0.0, "lr": 5.948745122950061e-07, "epoch": 3.347630988517883, "percentage": 66.95, "elapsed_time": "0:37:24", "remaining_time": "0:18:27", "throughput": 5565.71, "total_tokens": 12489984} +{"current_steps": 25370, "total_steps": 37885, "loss": 0.0, "lr": 5.944533569328841e-07, "epoch": 3.3482908802956315, "percentage": 66.97, "elapsed_time": "0:37:24", "remaining_time": "0:18:27", "throughput": 5566.03, "total_tokens": 12492544} +{"current_steps": 25375, "total_steps": 37885, "loss": 0.0267, "lr": 5.940322876451009e-07, "epoch": 3.3489507720733798, "percentage": 66.98, "elapsed_time": "0:37:24", "remaining_time": "0:18:26", "throughput": 5566.13, "total_tokens": 12494592} +{"current_steps": 25380, "total_steps": 37885, "loss": 0.0, "lr": 5.936113045210245e-07, "epoch": 3.3496106638511285, "percentage": 66.99, "elapsed_time": "0:37:25", "remaining_time": "0:18:26", "throughput": 5566.34, "total_tokens": 12496896} +{"current_steps": 25385, "total_steps": 37885, "loss": 0.1251, "lr": 5.931904076500062e-07, "epoch": 3.3502705556288768, "percentage": 67.01, "elapsed_time": "0:37:25", "remaining_time": "0:18:25", "throughput": 5566.74, "total_tokens": 12499648} +{"current_steps": 25390, "total_steps": 37885, "loss": 0.002, "lr": 5.927695971213781e-07, "epoch": 3.3509304474066255, "percentage": 67.02, "elapsed_time": "0:37:25", "remaining_time": "0:18:25", "throughput": 5567.09, "total_tokens": 12502272} +{"current_steps": 25395, "total_steps": 37885, "loss": 0.0, "lr": 5.923488730244537e-07, "epoch": 3.3515903391843738, "percentage": 67.03, "elapsed_time": "0:37:26", "remaining_time": "0:18:24", "throughput": 5567.46, "total_tokens": 12504960} +{"current_steps": 25400, "total_steps": 37885, "loss": 0.0413, "lr": 5.919282354485293e-07, "epoch": 3.352250230962122, "percentage": 67.05, "elapsed_time": "0:37:26", "remaining_time": "0:18:24", "throughput": 5567.76, "total_tokens": 12507456} +{"current_steps": 25405, "total_steps": 37885, "loss": 0.0002, "lr": 5.915076844828817e-07, "epoch": 3.3529101227398708, "percentage": 67.06, "elapsed_time": "0:37:26", "remaining_time": "0:18:23", "throughput": 5568.0, "total_tokens": 12509824} +{"current_steps": 25410, "total_steps": 37885, "loss": 0.0, "lr": 5.910872202167701e-07, "epoch": 3.353570014517619, "percentage": 67.07, "elapsed_time": "0:37:27", "remaining_time": "0:18:23", "throughput": 5568.4, "total_tokens": 12512576} +{"current_steps": 25415, "total_steps": 37885, "loss": 0.0, "lr": 5.90666842739435e-07, "epoch": 3.3542299062953678, "percentage": 67.08, "elapsed_time": "0:37:27", "remaining_time": "0:18:22", "throughput": 5568.69, "total_tokens": 12515072} +{"current_steps": 25420, "total_steps": 37885, "loss": 0.0025, "lr": 5.902465521400982e-07, "epoch": 3.354889798073116, "percentage": 67.1, "elapsed_time": "0:37:27", "remaining_time": "0:18:22", "throughput": 5569.01, "total_tokens": 12517632} +{"current_steps": 25425, "total_steps": 37885, "loss": 0.0032, "lr": 5.898263485079636e-07, "epoch": 3.3555496898508643, "percentage": 67.11, "elapsed_time": "0:37:28", "remaining_time": "0:18:21", "throughput": 5569.2, "total_tokens": 12519872} +{"current_steps": 25430, "total_steps": 37885, "loss": 0.0, "lr": 5.89406231932216e-07, "epoch": 3.356209581628613, "percentage": 67.12, "elapsed_time": "0:37:28", "remaining_time": "0:18:21", "throughput": 5569.47, "total_tokens": 12522304} +{"current_steps": 25435, "total_steps": 37885, "loss": 0.0922, "lr": 5.889862025020227e-07, "epoch": 3.3568694734063613, "percentage": 67.14, "elapsed_time": "0:37:28", "remaining_time": "0:18:20", "throughput": 5569.63, "total_tokens": 12524480} +{"current_steps": 25440, "total_steps": 37885, "loss": 0.0001, "lr": 5.885662603065316e-07, "epoch": 3.35752936518411, "percentage": 67.15, "elapsed_time": "0:37:29", "remaining_time": "0:18:20", "throughput": 5569.95, "total_tokens": 12527040} +{"current_steps": 25445, "total_steps": 37885, "loss": 0.0, "lr": 5.881464054348721e-07, "epoch": 3.3581892569618583, "percentage": 67.16, "elapsed_time": "0:37:29", "remaining_time": "0:18:19", "throughput": 5570.12, "total_tokens": 12529216} +{"current_steps": 25450, "total_steps": 37885, "loss": 0.0, "lr": 5.877266379761561e-07, "epoch": 3.3588491487396066, "percentage": 67.18, "elapsed_time": "0:37:29", "remaining_time": "0:18:19", "throughput": 5570.37, "total_tokens": 12531584} +{"current_steps": 25455, "total_steps": 37885, "loss": 0.0, "lr": 5.873069580194753e-07, "epoch": 3.3595090405173553, "percentage": 67.19, "elapsed_time": "0:37:30", "remaining_time": "0:18:18", "throughput": 5570.69, "total_tokens": 12534144} +{"current_steps": 25460, "total_steps": 37885, "loss": 0.0337, "lr": 5.868873656539044e-07, "epoch": 3.3601689322951036, "percentage": 67.2, "elapsed_time": "0:37:30", "remaining_time": "0:18:18", "throughput": 5570.85, "total_tokens": 12536320} +{"current_steps": 25465, "total_steps": 37885, "loss": 0.0002, "lr": 5.864678609684986e-07, "epoch": 3.360828824072852, "percentage": 67.22, "elapsed_time": "0:37:30", "remaining_time": "0:18:17", "throughput": 5571.07, "total_tokens": 12538624} +{"current_steps": 25470, "total_steps": 37885, "loss": 0.0007, "lr": 5.860484440522946e-07, "epoch": 3.3614887158506006, "percentage": 67.23, "elapsed_time": "0:37:30", "remaining_time": "0:18:17", "throughput": 5571.36, "total_tokens": 12541120} +{"current_steps": 25475, "total_steps": 37885, "loss": 0.1057, "lr": 5.856291149943109e-07, "epoch": 3.362148607628349, "percentage": 67.24, "elapsed_time": "0:37:31", "remaining_time": "0:18:16", "throughput": 5571.58, "total_tokens": 12543424} +{"current_steps": 25480, "total_steps": 37885, "loss": 0.0891, "lr": 5.852098738835467e-07, "epoch": 3.362808499406097, "percentage": 67.26, "elapsed_time": "0:37:31", "remaining_time": "0:18:16", "throughput": 5571.91, "total_tokens": 12545984} +{"current_steps": 25485, "total_steps": 37885, "loss": 0.0404, "lr": 5.847907208089834e-07, "epoch": 3.363468391183846, "percentage": 67.27, "elapsed_time": "0:37:31", "remaining_time": "0:18:15", "throughput": 5572.25, "total_tokens": 12548608} +{"current_steps": 25490, "total_steps": 37885, "loss": 0.0, "lr": 5.843716558595831e-07, "epoch": 3.364128282961594, "percentage": 67.28, "elapsed_time": "0:37:32", "remaining_time": "0:18:15", "throughput": 5572.54, "total_tokens": 12551104} +{"current_steps": 25495, "total_steps": 37885, "loss": 0.0276, "lr": 5.839526791242883e-07, "epoch": 3.364788174739343, "percentage": 67.3, "elapsed_time": "0:37:32", "remaining_time": "0:18:14", "throughput": 5572.84, "total_tokens": 12553600} +{"current_steps": 25500, "total_steps": 37885, "loss": 0.0009, "lr": 5.835337906920253e-07, "epoch": 3.365448066517091, "percentage": 67.31, "elapsed_time": "0:37:32", "remaining_time": "0:18:14", "throughput": 5573.1, "total_tokens": 12556032} +{"current_steps": 25505, "total_steps": 37885, "loss": 0.0394, "lr": 5.831149906516989e-07, "epoch": 3.3661079582948394, "percentage": 67.32, "elapsed_time": "0:37:33", "remaining_time": "0:18:13", "throughput": 5573.36, "total_tokens": 12558464} +{"current_steps": 25510, "total_steps": 37885, "loss": 0.0, "lr": 5.826962790921974e-07, "epoch": 3.366767850072588, "percentage": 67.34, "elapsed_time": "0:37:33", "remaining_time": "0:18:13", "throughput": 5573.55, "total_tokens": 12560704} +{"current_steps": 25515, "total_steps": 37885, "loss": 0.0337, "lr": 5.822776561023885e-07, "epoch": 3.3674277418503364, "percentage": 67.35, "elapsed_time": "0:37:33", "remaining_time": "0:18:12", "throughput": 5573.82, "total_tokens": 12563136} +{"current_steps": 25520, "total_steps": 37885, "loss": 0.0001, "lr": 5.81859121771122e-07, "epoch": 3.368087633628085, "percentage": 67.36, "elapsed_time": "0:37:34", "remaining_time": "0:18:12", "throughput": 5574.0, "total_tokens": 12565376} +{"current_steps": 25525, "total_steps": 37885, "loss": 0.0, "lr": 5.814406761872294e-07, "epoch": 3.3687475254058334, "percentage": 67.37, "elapsed_time": "0:37:34", "remaining_time": "0:18:11", "throughput": 5574.22, "total_tokens": 12567680} +{"current_steps": 25530, "total_steps": 37885, "loss": 0.0018, "lr": 5.810223194395221e-07, "epoch": 3.3694074171835817, "percentage": 67.39, "elapsed_time": "0:37:34", "remaining_time": "0:18:11", "throughput": 5574.51, "total_tokens": 12570176} +{"current_steps": 25535, "total_steps": 37885, "loss": 0.0631, "lr": 5.806040516167933e-07, "epoch": 3.3700673089613304, "percentage": 67.4, "elapsed_time": "0:37:35", "remaining_time": "0:18:10", "throughput": 5574.81, "total_tokens": 12572672} +{"current_steps": 25540, "total_steps": 37885, "loss": 0.0364, "lr": 5.801858728078179e-07, "epoch": 3.3707272007390787, "percentage": 67.41, "elapsed_time": "0:37:35", "remaining_time": "0:18:10", "throughput": 5575.15, "total_tokens": 12575296} +{"current_steps": 25545, "total_steps": 37885, "loss": 0.076, "lr": 5.797677831013506e-07, "epoch": 3.3713870925168274, "percentage": 67.43, "elapsed_time": "0:37:35", "remaining_time": "0:18:09", "throughput": 5575.47, "total_tokens": 12577856} +{"current_steps": 25550, "total_steps": 37885, "loss": 0.0607, "lr": 5.793497825861283e-07, "epoch": 3.3720469842945757, "percentage": 67.44, "elapsed_time": "0:37:36", "remaining_time": "0:18:09", "throughput": 5575.7, "total_tokens": 12580224} +{"current_steps": 25555, "total_steps": 37885, "loss": 0.055, "lr": 5.789318713508686e-07, "epoch": 3.372706876072324, "percentage": 67.45, "elapsed_time": "0:37:36", "remaining_time": "0:18:08", "throughput": 5575.89, "total_tokens": 12582464} +{"current_steps": 25560, "total_steps": 37885, "loss": 0.0001, "lr": 5.785140494842704e-07, "epoch": 3.3733667678500727, "percentage": 67.47, "elapsed_time": "0:37:36", "remaining_time": "0:18:08", "throughput": 5576.1, "total_tokens": 12584768} +{"current_steps": 25565, "total_steps": 37885, "loss": 0.0001, "lr": 5.780963170750129e-07, "epoch": 3.374026659627821, "percentage": 67.48, "elapsed_time": "0:37:37", "remaining_time": "0:18:07", "throughput": 5576.4, "total_tokens": 12587264} +{"current_steps": 25570, "total_steps": 37885, "loss": 0.0001, "lr": 5.776786742117564e-07, "epoch": 3.3746865514055697, "percentage": 67.49, "elapsed_time": "0:37:37", "remaining_time": "0:18:07", "throughput": 5576.68, "total_tokens": 12589760} +{"current_steps": 25575, "total_steps": 37885, "loss": 0.0678, "lr": 5.772611209831436e-07, "epoch": 3.375346443183318, "percentage": 67.51, "elapsed_time": "0:37:37", "remaining_time": "0:18:06", "throughput": 5576.99, "total_tokens": 12592320} +{"current_steps": 25580, "total_steps": 37885, "loss": 0.0014, "lr": 5.768436574777964e-07, "epoch": 3.3760063349610663, "percentage": 67.52, "elapsed_time": "0:37:38", "remaining_time": "0:18:06", "throughput": 5577.34, "total_tokens": 12594944} +{"current_steps": 25585, "total_steps": 37885, "loss": 0.0001, "lr": 5.764262837843186e-07, "epoch": 3.376666226738815, "percentage": 67.53, "elapsed_time": "0:37:38", "remaining_time": "0:18:05", "throughput": 5577.58, "total_tokens": 12597312} +{"current_steps": 25590, "total_steps": 37885, "loss": 0.0004, "lr": 5.760089999912947e-07, "epoch": 3.3773261185165633, "percentage": 67.55, "elapsed_time": "0:37:38", "remaining_time": "0:18:05", "throughput": 5577.85, "total_tokens": 12599744} +{"current_steps": 25595, "total_steps": 37885, "loss": 0.0473, "lr": 5.755918061872907e-07, "epoch": 3.3779860102943116, "percentage": 67.56, "elapsed_time": "0:37:39", "remaining_time": "0:18:04", "throughput": 5578.25, "total_tokens": 12602496} +{"current_steps": 25600, "total_steps": 37885, "loss": 0.0001, "lr": 5.751747024608527e-07, "epoch": 3.3786459020720603, "percentage": 67.57, "elapsed_time": "0:37:39", "remaining_time": "0:18:04", "throughput": 5578.44, "total_tokens": 12604736} +{"current_steps": 25605, "total_steps": 37885, "loss": 0.0474, "lr": 5.747576889005068e-07, "epoch": 3.3793057938498086, "percentage": 67.59, "elapsed_time": "0:37:39", "remaining_time": "0:18:03", "throughput": 5578.7, "total_tokens": 12607168} +{"current_steps": 25610, "total_steps": 37885, "loss": 0.0322, "lr": 5.743407655947627e-07, "epoch": 3.379965685627557, "percentage": 67.6, "elapsed_time": "0:37:40", "remaining_time": "0:18:03", "throughput": 5579.09, "total_tokens": 12609920} +{"current_steps": 25615, "total_steps": 37885, "loss": 0.0329, "lr": 5.739239326321086e-07, "epoch": 3.3806255774053056, "percentage": 67.61, "elapsed_time": "0:37:40", "remaining_time": "0:18:02", "throughput": 5579.28, "total_tokens": 12612160} +{"current_steps": 25620, "total_steps": 37885, "loss": 0.0, "lr": 5.735071901010146e-07, "epoch": 3.381285469183054, "percentage": 67.63, "elapsed_time": "0:37:40", "remaining_time": "0:18:02", "throughput": 5579.59, "total_tokens": 12614720} +{"current_steps": 25625, "total_steps": 37885, "loss": 0.0001, "lr": 5.730905380899309e-07, "epoch": 3.3819453609608026, "percentage": 67.64, "elapsed_time": "0:37:41", "remaining_time": "0:18:01", "throughput": 5579.98, "total_tokens": 12617472} +{"current_steps": 25630, "total_steps": 37885, "loss": 0.0, "lr": 5.72673976687289e-07, "epoch": 3.382605252738551, "percentage": 67.65, "elapsed_time": "0:37:41", "remaining_time": "0:18:01", "throughput": 5580.27, "total_tokens": 12619968} +{"current_steps": 25635, "total_steps": 37885, "loss": 0.0022, "lr": 5.722575059815014e-07, "epoch": 3.383265144516299, "percentage": 67.67, "elapsed_time": "0:37:41", "remaining_time": "0:18:00", "throughput": 5580.49, "total_tokens": 12622272} +{"current_steps": 25640, "total_steps": 37885, "loss": 0.0005, "lr": 5.718411260609599e-07, "epoch": 3.383925036294048, "percentage": 67.68, "elapsed_time": "0:37:42", "remaining_time": "0:18:00", "throughput": 5580.7, "total_tokens": 12624576} +{"current_steps": 25645, "total_steps": 37885, "loss": 0.0251, "lr": 5.714248370140397e-07, "epoch": 3.384584928071796, "percentage": 67.69, "elapsed_time": "0:37:42", "remaining_time": "0:17:59", "throughput": 5581.06, "total_tokens": 12627264} +{"current_steps": 25650, "total_steps": 37885, "loss": 0.004, "lr": 5.710086389290945e-07, "epoch": 3.385244819849545, "percentage": 67.7, "elapsed_time": "0:37:42", "remaining_time": "0:17:59", "throughput": 5581.43, "total_tokens": 12629952} +{"current_steps": 25655, "total_steps": 37885, "loss": 0.0, "lr": 5.705925318944585e-07, "epoch": 3.385904711627293, "percentage": 67.72, "elapsed_time": "0:37:43", "remaining_time": "0:17:58", "throughput": 5581.85, "total_tokens": 12632768} +{"current_steps": 25660, "total_steps": 37885, "loss": 0.0, "lr": 5.701765159984483e-07, "epoch": 3.3865646034050414, "percentage": 67.73, "elapsed_time": "0:37:43", "remaining_time": "0:17:58", "throughput": 5582.04, "total_tokens": 12635008} +{"current_steps": 25665, "total_steps": 37885, "loss": 0.0, "lr": 5.6976059132936e-07, "epoch": 3.38722449518279, "percentage": 67.74, "elapsed_time": "0:37:43", "remaining_time": "0:17:57", "throughput": 5582.36, "total_tokens": 12637568} +{"current_steps": 25670, "total_steps": 37885, "loss": 0.0675, "lr": 5.69344757975471e-07, "epoch": 3.3878843869605384, "percentage": 67.76, "elapsed_time": "0:37:44", "remaining_time": "0:17:57", "throughput": 5582.7, "total_tokens": 12640192} +{"current_steps": 25675, "total_steps": 37885, "loss": 0.1155, "lr": 5.689290160250382e-07, "epoch": 3.388544278738287, "percentage": 67.77, "elapsed_time": "0:37:44", "remaining_time": "0:17:56", "throughput": 5582.99, "total_tokens": 12642688} +{"current_steps": 25680, "total_steps": 37885, "loss": 0.0025, "lr": 5.685133655663001e-07, "epoch": 3.3892041705160354, "percentage": 67.78, "elapsed_time": "0:37:44", "remaining_time": "0:17:56", "throughput": 5583.35, "total_tokens": 12645376} +{"current_steps": 25685, "total_steps": 37885, "loss": 0.1427, "lr": 5.68097806687476e-07, "epoch": 3.3898640622937837, "percentage": 67.8, "elapsed_time": "0:37:45", "remaining_time": "0:17:55", "throughput": 5583.7, "total_tokens": 12648000} +{"current_steps": 25690, "total_steps": 37885, "loss": 0.0001, "lr": 5.676823394767644e-07, "epoch": 3.3905239540715324, "percentage": 67.81, "elapsed_time": "0:37:45", "remaining_time": "0:17:55", "throughput": 5584.12, "total_tokens": 12650816} +{"current_steps": 25695, "total_steps": 37885, "loss": 0.0044, "lr": 5.672669640223458e-07, "epoch": 3.3911838458492807, "percentage": 67.82, "elapsed_time": "0:37:45", "remaining_time": "0:17:54", "throughput": 5584.43, "total_tokens": 12653376} +{"current_steps": 25700, "total_steps": 37885, "loss": 0.0, "lr": 5.668516804123808e-07, "epoch": 3.3918437376270294, "percentage": 67.84, "elapsed_time": "0:37:46", "remaining_time": "0:17:54", "throughput": 5584.77, "total_tokens": 12656000} +{"current_steps": 25705, "total_steps": 37885, "loss": 0.0018, "lr": 5.664364887350097e-07, "epoch": 3.3925036294047777, "percentage": 67.85, "elapsed_time": "0:37:46", "remaining_time": "0:17:53", "throughput": 5585.07, "total_tokens": 12658496} +{"current_steps": 25710, "total_steps": 37885, "loss": 0.0, "lr": 5.660213890783541e-07, "epoch": 3.393163521182526, "percentage": 67.86, "elapsed_time": "0:37:46", "remaining_time": "0:17:53", "throughput": 5585.31, "total_tokens": 12660864} +{"current_steps": 25715, "total_steps": 37885, "loss": 0.0, "lr": 5.656063815305161e-07, "epoch": 3.3938234129602747, "percentage": 67.88, "elapsed_time": "0:37:47", "remaining_time": "0:17:52", "throughput": 5585.62, "total_tokens": 12663424} +{"current_steps": 25720, "total_steps": 37885, "loss": 0.0, "lr": 5.651914661795785e-07, "epoch": 3.394483304738023, "percentage": 67.89, "elapsed_time": "0:37:47", "remaining_time": "0:17:52", "throughput": 5585.89, "total_tokens": 12665856} +{"current_steps": 25725, "total_steps": 37885, "loss": 0.0001, "lr": 5.64776643113603e-07, "epoch": 3.3951431965157712, "percentage": 67.9, "elapsed_time": "0:37:47", "remaining_time": "0:17:51", "throughput": 5586.18, "total_tokens": 12668352} +{"current_steps": 25730, "total_steps": 37885, "loss": 0.0, "lr": 5.643619124206333e-07, "epoch": 3.39580308829352, "percentage": 67.92, "elapsed_time": "0:37:48", "remaining_time": "0:17:51", "throughput": 5586.37, "total_tokens": 12670592} +{"current_steps": 25735, "total_steps": 37885, "loss": 0.0518, "lr": 5.639472741886937e-07, "epoch": 3.3964629800712682, "percentage": 67.93, "elapsed_time": "0:37:48", "remaining_time": "0:17:50", "throughput": 5586.64, "total_tokens": 12673024} +{"current_steps": 25740, "total_steps": 37885, "loss": 0.0003, "lr": 5.635327285057869e-07, "epoch": 3.3971228718490165, "percentage": 67.94, "elapsed_time": "0:37:48", "remaining_time": "0:17:50", "throughput": 5586.9, "total_tokens": 12675456} +{"current_steps": 25745, "total_steps": 37885, "loss": 0.0, "lr": 5.63118275459898e-07, "epoch": 3.3977827636267652, "percentage": 67.96, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.24, "total_tokens": 12678080} +{"current_steps": 25750, "total_steps": 37885, "loss": 0.0595, "lr": 5.627039151389917e-07, "epoch": 3.3984426554045135, "percentage": 67.97, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.43, "total_tokens": 12680320} +{"current_steps": 25755, "total_steps": 37885, "loss": 0.0, "lr": 5.622896476310125e-07, "epoch": 3.3991025471822622, "percentage": 67.98, "elapsed_time": "0:37:49", "remaining_time": "0:17:49", "throughput": 5587.63, "total_tokens": 12682624} +{"current_steps": 25760, "total_steps": 37885, "loss": 0.0, "lr": 5.618754730238863e-07, "epoch": 3.3997624389600105, "percentage": 68.0, "elapsed_time": "0:37:50", "remaining_time": "0:17:48", "throughput": 5587.92, "total_tokens": 12685120} +{"current_steps": 25765, "total_steps": 37885, "loss": 0.2906, "lr": 5.614613914055175e-07, "epoch": 3.400422330737759, "percentage": 68.01, "elapsed_time": "0:37:50", "remaining_time": "0:17:48", "throughput": 5588.28, "total_tokens": 12687808} +{"current_steps": 25770, "total_steps": 37885, "loss": 0.0, "lr": 5.610474028637935e-07, "epoch": 3.4010822225155075, "percentage": 68.02, "elapsed_time": "0:37:50", "remaining_time": "0:17:47", "throughput": 5588.54, "total_tokens": 12690240} +{"current_steps": 25775, "total_steps": 37885, "loss": 0.0, "lr": 5.606335074865795e-07, "epoch": 3.401742114293256, "percentage": 68.03, "elapsed_time": "0:37:51", "remaining_time": "0:17:47", "throughput": 5588.76, "total_tokens": 12692544} +{"current_steps": 25780, "total_steps": 37885, "loss": 0.0, "lr": 5.602197053617214e-07, "epoch": 3.4024020060710045, "percentage": 68.05, "elapsed_time": "0:37:51", "remaining_time": "0:17:46", "throughput": 5589.07, "total_tokens": 12695104} +{"current_steps": 25785, "total_steps": 37885, "loss": 0.0, "lr": 5.598059965770468e-07, "epoch": 3.403061897848753, "percentage": 68.06, "elapsed_time": "0:37:51", "remaining_time": "0:17:46", "throughput": 5589.33, "total_tokens": 12697536} +{"current_steps": 25790, "total_steps": 37885, "loss": 0.0002, "lr": 5.593923812203613e-07, "epoch": 3.403721789626501, "percentage": 68.07, "elapsed_time": "0:37:52", "remaining_time": "0:17:45", "throughput": 5589.55, "total_tokens": 12699840} +{"current_steps": 25795, "total_steps": 37885, "loss": 0.0, "lr": 5.589788593794529e-07, "epoch": 3.40438168140425, "percentage": 68.09, "elapsed_time": "0:37:52", "remaining_time": "0:17:45", "throughput": 5589.82, "total_tokens": 12702336} +{"current_steps": 25800, "total_steps": 37885, "loss": 0.0, "lr": 5.585654311420873e-07, "epoch": 3.405041573181998, "percentage": 68.1, "elapsed_time": "0:37:52", "remaining_time": "0:17:44", "throughput": 5590.03, "total_tokens": 12704640} +{"current_steps": 25805, "total_steps": 37885, "loss": 0.0001, "lr": 5.581520965960125e-07, "epoch": 3.405701464959747, "percentage": 68.11, "elapsed_time": "0:37:53", "remaining_time": "0:17:44", "throughput": 5590.37, "total_tokens": 12707264} +{"current_steps": 25810, "total_steps": 37885, "loss": 0.028, "lr": 5.57738855828956e-07, "epoch": 3.406361356737495, "percentage": 68.13, "elapsed_time": "0:37:53", "remaining_time": "0:17:43", "throughput": 5590.7, "total_tokens": 12709888} +{"current_steps": 25815, "total_steps": 37885, "loss": 0.0, "lr": 5.573257089286243e-07, "epoch": 3.4070212485152434, "percentage": 68.14, "elapsed_time": "0:37:53", "remaining_time": "0:17:43", "throughput": 5590.91, "total_tokens": 12712192} +{"current_steps": 25820, "total_steps": 37885, "loss": 0.0454, "lr": 5.569126559827053e-07, "epoch": 3.407681140292992, "percentage": 68.15, "elapsed_time": "0:37:54", "remaining_time": "0:17:42", "throughput": 5591.15, "total_tokens": 12714560} +{"current_steps": 25825, "total_steps": 37885, "loss": 0.0, "lr": 5.564996970788667e-07, "epoch": 3.4083410320707404, "percentage": 68.17, "elapsed_time": "0:37:54", "remaining_time": "0:17:42", "throughput": 5591.44, "total_tokens": 12717056} +{"current_steps": 25830, "total_steps": 37885, "loss": 0.0059, "lr": 5.560868323047556e-07, "epoch": 3.409000923848489, "percentage": 68.18, "elapsed_time": "0:37:54", "remaining_time": "0:17:41", "throughput": 5591.73, "total_tokens": 12719552} +{"current_steps": 25835, "total_steps": 37885, "loss": 0.0, "lr": 5.556740617479998e-07, "epoch": 3.4096608156262374, "percentage": 68.19, "elapsed_time": "0:37:55", "remaining_time": "0:17:41", "throughput": 5591.96, "total_tokens": 12721920} +{"current_steps": 25840, "total_steps": 37885, "loss": 0.0352, "lr": 5.552613854962067e-07, "epoch": 3.4103207074039856, "percentage": 68.21, "elapsed_time": "0:37:55", "remaining_time": "0:17:40", "throughput": 5592.32, "total_tokens": 12724608} +{"current_steps": 25845, "total_steps": 37885, "loss": 0.0472, "lr": 5.548488036369645e-07, "epoch": 3.4109805991817344, "percentage": 68.22, "elapsed_time": "0:37:55", "remaining_time": "0:17:40", "throughput": 5592.63, "total_tokens": 12727168} +{"current_steps": 25850, "total_steps": 37885, "loss": 0.0, "lr": 5.544363162578399e-07, "epoch": 3.4116404909594826, "percentage": 68.23, "elapsed_time": "0:37:56", "remaining_time": "0:17:39", "throughput": 5592.81, "total_tokens": 12729408} +{"current_steps": 25855, "total_steps": 37885, "loss": 0.0, "lr": 5.540239234463804e-07, "epoch": 3.412300382737231, "percentage": 68.25, "elapsed_time": "0:37:56", "remaining_time": "0:17:39", "throughput": 5593.2, "total_tokens": 12732160} +{"current_steps": 25860, "total_steps": 37885, "loss": 0.0, "lr": 5.536116252901142e-07, "epoch": 3.4129602745149796, "percentage": 68.26, "elapsed_time": "0:37:56", "remaining_time": "0:17:38", "throughput": 5593.48, "total_tokens": 12734656} +{"current_steps": 25865, "total_steps": 37885, "loss": 0.0213, "lr": 5.531994218765477e-07, "epoch": 3.413620166292728, "percentage": 68.27, "elapsed_time": "0:37:57", "remaining_time": "0:17:38", "throughput": 5593.78, "total_tokens": 12737216} +{"current_steps": 25870, "total_steps": 37885, "loss": 0.0, "lr": 5.527873132931682e-07, "epoch": 3.4142800580704766, "percentage": 68.29, "elapsed_time": "0:37:57", "remaining_time": "0:17:37", "throughput": 5594.17, "total_tokens": 12739968} +{"current_steps": 25875, "total_steps": 37885, "loss": 0.0366, "lr": 5.523752996274435e-07, "epoch": 3.414939949848225, "percentage": 68.3, "elapsed_time": "0:37:57", "remaining_time": "0:17:37", "throughput": 5594.35, "total_tokens": 12742208} +{"current_steps": 25880, "total_steps": 37885, "loss": 0.0003, "lr": 5.519633809668197e-07, "epoch": 3.415599841625973, "percentage": 68.31, "elapsed_time": "0:37:58", "remaining_time": "0:17:36", "throughput": 5594.62, "total_tokens": 12744640} +{"current_steps": 25885, "total_steps": 37885, "loss": 0.0007, "lr": 5.515515573987238e-07, "epoch": 3.416259733403722, "percentage": 68.33, "elapsed_time": "0:37:58", "remaining_time": "0:17:36", "throughput": 5594.85, "total_tokens": 12747008} +{"current_steps": 25890, "total_steps": 37885, "loss": 0.1469, "lr": 5.511398290105625e-07, "epoch": 3.41691962518147, "percentage": 68.34, "elapsed_time": "0:37:58", "remaining_time": "0:17:35", "throughput": 5595.16, "total_tokens": 12749568} +{"current_steps": 25895, "total_steps": 37885, "loss": 0.0004, "lr": 5.507281958897224e-07, "epoch": 3.4175795169592185, "percentage": 68.35, "elapsed_time": "0:37:59", "remaining_time": "0:17:35", "throughput": 5595.47, "total_tokens": 12752128} +{"current_steps": 25900, "total_steps": 37885, "loss": 0.0002, "lr": 5.503166581235694e-07, "epoch": 3.418239408736967, "percentage": 68.36, "elapsed_time": "0:37:59", "remaining_time": "0:17:34", "throughput": 5595.73, "total_tokens": 12754560} +{"current_steps": 25905, "total_steps": 37885, "loss": 0.0938, "lr": 5.499052157994486e-07, "epoch": 3.4188993005147155, "percentage": 68.38, "elapsed_time": "0:37:59", "remaining_time": "0:17:34", "throughput": 5596.1, "total_tokens": 12757248} +{"current_steps": 25910, "total_steps": 37885, "loss": 0.0352, "lr": 5.49493869004687e-07, "epoch": 3.419559192292464, "percentage": 68.39, "elapsed_time": "0:38:00", "remaining_time": "0:17:33", "throughput": 5596.43, "total_tokens": 12759872} +{"current_steps": 25915, "total_steps": 37885, "loss": 0.0001, "lr": 5.490826178265893e-07, "epoch": 3.4202190840702125, "percentage": 68.4, "elapsed_time": "0:38:00", "remaining_time": "0:17:33", "throughput": 5596.66, "total_tokens": 12762240} +{"current_steps": 25920, "total_steps": 37885, "loss": 0.0026, "lr": 5.486714623524405e-07, "epoch": 3.4208789758479607, "percentage": 68.42, "elapsed_time": "0:38:00", "remaining_time": "0:17:32", "throughput": 5596.86, "total_tokens": 12764544} +{"current_steps": 25925, "total_steps": 37885, "loss": 0.0, "lr": 5.482604026695057e-07, "epoch": 3.4215388676257095, "percentage": 68.43, "elapsed_time": "0:38:00", "remaining_time": "0:17:32", "throughput": 5597.25, "total_tokens": 12767296} +{"current_steps": 25930, "total_steps": 37885, "loss": 0.1487, "lr": 5.478494388650295e-07, "epoch": 3.4221987594034577, "percentage": 68.44, "elapsed_time": "0:38:01", "remaining_time": "0:17:31", "throughput": 5597.59, "total_tokens": 12769920} +{"current_steps": 25935, "total_steps": 37885, "loss": 0.0041, "lr": 5.474385710262357e-07, "epoch": 3.4228586511812065, "percentage": 68.46, "elapsed_time": "0:38:01", "remaining_time": "0:17:31", "throughput": 5597.87, "total_tokens": 12772416} +{"current_steps": 25940, "total_steps": 37885, "loss": 0.0268, "lr": 5.470277992403271e-07, "epoch": 3.4235185429589547, "percentage": 68.47, "elapsed_time": "0:38:01", "remaining_time": "0:17:30", "throughput": 5598.09, "total_tokens": 12774720} +{"current_steps": 25945, "total_steps": 37885, "loss": 0.0657, "lr": 5.466171235944889e-07, "epoch": 3.424178434736703, "percentage": 68.48, "elapsed_time": "0:38:02", "remaining_time": "0:17:30", "throughput": 5598.35, "total_tokens": 12777152} +{"current_steps": 25950, "total_steps": 37885, "loss": 0.0, "lr": 5.462065441758826e-07, "epoch": 3.4248383265144517, "percentage": 68.5, "elapsed_time": "0:38:02", "remaining_time": "0:17:29", "throughput": 5598.68, "total_tokens": 12779776} +{"current_steps": 25955, "total_steps": 37885, "loss": 0.0984, "lr": 5.457960610716515e-07, "epoch": 3.4254982182922, "percentage": 68.51, "elapsed_time": "0:38:02", "remaining_time": "0:17:29", "throughput": 5598.96, "total_tokens": 12782272} +{"current_steps": 25960, "total_steps": 37885, "loss": 0.0387, "lr": 5.453856743689172e-07, "epoch": 3.4261581100699487, "percentage": 68.52, "elapsed_time": "0:38:03", "remaining_time": "0:17:28", "throughput": 5599.36, "total_tokens": 12785088} +{"current_steps": 25965, "total_steps": 37885, "loss": 0.0009, "lr": 5.449753841547811e-07, "epoch": 3.426818001847697, "percentage": 68.54, "elapsed_time": "0:38:03", "remaining_time": "0:17:28", "throughput": 5599.57, "total_tokens": 12787392} +{"current_steps": 25970, "total_steps": 37885, "loss": 0.0017, "lr": 5.445651905163253e-07, "epoch": 3.4274778936254453, "percentage": 68.55, "elapsed_time": "0:38:03", "remaining_time": "0:17:27", "throughput": 5599.88, "total_tokens": 12789952} +{"current_steps": 25975, "total_steps": 37885, "loss": 0.02, "lr": 5.441550935406091e-07, "epoch": 3.428137785403194, "percentage": 68.56, "elapsed_time": "0:38:04", "remaining_time": "0:17:27", "throughput": 5600.24, "total_tokens": 12792640} +{"current_steps": 25980, "total_steps": 37885, "loss": 0.0001, "lr": 5.43745093314674e-07, "epoch": 3.4287976771809423, "percentage": 68.58, "elapsed_time": "0:38:04", "remaining_time": "0:17:26", "throughput": 5600.53, "total_tokens": 12795136} +{"current_steps": 25985, "total_steps": 37885, "loss": 0.0, "lr": 5.433351899255389e-07, "epoch": 3.4294575689586906, "percentage": 68.59, "elapsed_time": "0:38:04", "remaining_time": "0:17:26", "throughput": 5600.9, "total_tokens": 12797824} +{"current_steps": 25990, "total_steps": 37885, "loss": 0.0013, "lr": 5.429253834602025e-07, "epoch": 3.4301174607364393, "percentage": 68.6, "elapsed_time": "0:38:05", "remaining_time": "0:17:25", "throughput": 5601.31, "total_tokens": 12800640} +{"current_steps": 25995, "total_steps": 37885, "loss": 0.0, "lr": 5.425156740056436e-07, "epoch": 3.4307773525141876, "percentage": 68.62, "elapsed_time": "0:38:05", "remaining_time": "0:17:25", "throughput": 5601.62, "total_tokens": 12803200} +{"current_steps": 26000, "total_steps": 37885, "loss": 0.0001, "lr": 5.4210606164882e-07, "epoch": 3.4314372442919363, "percentage": 68.63, "elapsed_time": "0:38:05", "remaining_time": "0:17:24", "throughput": 5601.91, "total_tokens": 12805696} +{"current_steps": 26005, "total_steps": 37885, "loss": 0.1064, "lr": 5.416965464766694e-07, "epoch": 3.4320971360696846, "percentage": 68.64, "elapsed_time": "0:38:06", "remaining_time": "0:17:24", "throughput": 5602.37, "total_tokens": 12808640} +{"current_steps": 26010, "total_steps": 37885, "loss": 0.0001, "lr": 5.412871285761076e-07, "epoch": 3.432757027847433, "percentage": 68.66, "elapsed_time": "0:38:06", "remaining_time": "0:17:23", "throughput": 5602.73, "total_tokens": 12811328} +{"current_steps": 26015, "total_steps": 37885, "loss": 0.0701, "lr": 5.408778080340311e-07, "epoch": 3.4334169196251816, "percentage": 68.67, "elapsed_time": "0:38:06", "remaining_time": "0:17:23", "throughput": 5603.14, "total_tokens": 12814144} +{"current_steps": 26020, "total_steps": 37885, "loss": 0.0001, "lr": 5.404685849373154e-07, "epoch": 3.43407681140293, "percentage": 68.68, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.37, "total_tokens": 12816512} +{"current_steps": 26025, "total_steps": 37885, "loss": 0.0001, "lr": 5.400594593728146e-07, "epoch": 3.434736703180678, "percentage": 68.69, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.55, "total_tokens": 12818752} +{"current_steps": 26030, "total_steps": 37885, "loss": 0.0, "lr": 5.396504314273629e-07, "epoch": 3.435396594958427, "percentage": 68.71, "elapsed_time": "0:38:07", "remaining_time": "0:17:22", "throughput": 5603.87, "total_tokens": 12821312} +{"current_steps": 26035, "total_steps": 37885, "loss": 0.0, "lr": 5.39241501187774e-07, "epoch": 3.436056486736175, "percentage": 68.72, "elapsed_time": "0:38:08", "remaining_time": "0:17:21", "throughput": 5604.18, "total_tokens": 12823872} +{"current_steps": 26040, "total_steps": 37885, "loss": 0.0004, "lr": 5.388326687408395e-07, "epoch": 3.436716378513924, "percentage": 68.73, "elapsed_time": "0:38:08", "remaining_time": "0:17:21", "throughput": 5604.54, "total_tokens": 12826560} +{"current_steps": 26045, "total_steps": 37885, "loss": 0.0, "lr": 5.384239341733314e-07, "epoch": 3.437376270291672, "percentage": 68.75, "elapsed_time": "0:38:08", "remaining_time": "0:17:20", "throughput": 5604.9, "total_tokens": 12829248} +{"current_steps": 26050, "total_steps": 37885, "loss": 0.0, "lr": 5.38015297572001e-07, "epoch": 3.4380361620694204, "percentage": 68.76, "elapsed_time": "0:38:09", "remaining_time": "0:17:20", "throughput": 5605.12, "total_tokens": 12831616} +{"current_steps": 26055, "total_steps": 37885, "loss": 0.0, "lr": 5.376067590235786e-07, "epoch": 3.438696053847169, "percentage": 68.77, "elapsed_time": "0:38:09", "remaining_time": "0:17:19", "throughput": 5605.46, "total_tokens": 12834240} +{"current_steps": 26060, "total_steps": 37885, "loss": 0.0, "lr": 5.371983186147729e-07, "epoch": 3.4393559456249174, "percentage": 68.79, "elapsed_time": "0:38:09", "remaining_time": "0:17:19", "throughput": 5605.81, "total_tokens": 12836928} +{"current_steps": 26065, "total_steps": 37885, "loss": 0.0016, "lr": 5.367899764322725e-07, "epoch": 3.440015837402666, "percentage": 68.8, "elapsed_time": "0:38:10", "remaining_time": "0:17:18", "throughput": 5606.25, "total_tokens": 12839808} +{"current_steps": 26070, "total_steps": 37885, "loss": 0.0673, "lr": 5.363817325627458e-07, "epoch": 3.4406757291804144, "percentage": 68.81, "elapsed_time": "0:38:10", "remaining_time": "0:17:18", "throughput": 5606.48, "total_tokens": 12842176} +{"current_steps": 26075, "total_steps": 37885, "loss": 0.0, "lr": 5.359735870928388e-07, "epoch": 3.4413356209581627, "percentage": 68.83, "elapsed_time": "0:38:10", "remaining_time": "0:17:17", "throughput": 5606.77, "total_tokens": 12844672} +{"current_steps": 26080, "total_steps": 37885, "loss": 0.0938, "lr": 5.355655401091776e-07, "epoch": 3.4419955127359114, "percentage": 68.84, "elapsed_time": "0:38:11", "remaining_time": "0:17:17", "throughput": 5606.98, "total_tokens": 12846976} +{"current_steps": 26085, "total_steps": 37885, "loss": 0.0001, "lr": 5.351575916983677e-07, "epoch": 3.4426554045136597, "percentage": 68.85, "elapsed_time": "0:38:11", "remaining_time": "0:17:16", "throughput": 5607.31, "total_tokens": 12849600} +{"current_steps": 26090, "total_steps": 37885, "loss": 0.0457, "lr": 5.347497419469926e-07, "epoch": 3.4433152962914084, "percentage": 68.87, "elapsed_time": "0:38:11", "remaining_time": "0:17:16", "throughput": 5607.46, "total_tokens": 12851776} +{"current_steps": 26095, "total_steps": 37885, "loss": 0.0, "lr": 5.34341990941616e-07, "epoch": 3.4439751880691567, "percentage": 68.88, "elapsed_time": "0:38:12", "remaining_time": "0:17:15", "throughput": 5607.7, "total_tokens": 12854144} +{"current_steps": 26100, "total_steps": 37885, "loss": 0.0001, "lr": 5.33934338768779e-07, "epoch": 3.444635079846905, "percentage": 68.89, "elapsed_time": "0:38:12", "remaining_time": "0:17:15", "throughput": 5607.99, "total_tokens": 12856640} +{"current_steps": 26105, "total_steps": 37885, "loss": 0.0, "lr": 5.335267855150045e-07, "epoch": 3.4452949716246537, "percentage": 68.91, "elapsed_time": "0:38:12", "remaining_time": "0:17:14", "throughput": 5608.25, "total_tokens": 12859072} +{"current_steps": 26110, "total_steps": 37885, "loss": 0.0, "lr": 5.331193312667916e-07, "epoch": 3.445954863402402, "percentage": 68.92, "elapsed_time": "0:38:13", "remaining_time": "0:17:14", "throughput": 5608.64, "total_tokens": 12861824} +{"current_steps": 26115, "total_steps": 37885, "loss": 0.0298, "lr": 5.327119761106193e-07, "epoch": 3.4466147551801507, "percentage": 68.93, "elapsed_time": "0:38:13", "remaining_time": "0:17:13", "throughput": 5608.82, "total_tokens": 12864064} +{"current_steps": 26120, "total_steps": 37885, "loss": 0.0036, "lr": 5.323047201329468e-07, "epoch": 3.447274646957899, "percentage": 68.95, "elapsed_time": "0:38:13", "remaining_time": "0:17:13", "throughput": 5609.16, "total_tokens": 12866688} +{"current_steps": 26125, "total_steps": 37885, "loss": 0.0441, "lr": 5.318975634202103e-07, "epoch": 3.4479345387356473, "percentage": 68.96, "elapsed_time": "0:38:14", "remaining_time": "0:17:12", "throughput": 5609.38, "total_tokens": 12869056} +{"current_steps": 26130, "total_steps": 37885, "loss": 0.0, "lr": 5.314905060588266e-07, "epoch": 3.448594430513396, "percentage": 68.97, "elapsed_time": "0:38:14", "remaining_time": "0:17:12", "throughput": 5609.74, "total_tokens": 12871744} +{"current_steps": 26135, "total_steps": 37885, "loss": 0.0, "lr": 5.310835481351901e-07, "epoch": 3.4492543222911443, "percentage": 68.99, "elapsed_time": "0:38:14", "remaining_time": "0:17:11", "throughput": 5610.0, "total_tokens": 12874176} +{"current_steps": 26140, "total_steps": 37885, "loss": 0.0, "lr": 5.306766897356747e-07, "epoch": 3.4499142140688925, "percentage": 69.0, "elapsed_time": "0:38:15", "remaining_time": "0:17:11", "throughput": 5610.28, "total_tokens": 12876672} +{"current_steps": 26145, "total_steps": 37885, "loss": 0.0013, "lr": 5.302699309466338e-07, "epoch": 3.4505741058466413, "percentage": 69.01, "elapsed_time": "0:38:15", "remaining_time": "0:17:10", "throughput": 5610.59, "total_tokens": 12879232} +{"current_steps": 26150, "total_steps": 37885, "loss": 0.072, "lr": 5.298632718543981e-07, "epoch": 3.4512339976243895, "percentage": 69.02, "elapsed_time": "0:38:15", "remaining_time": "0:17:10", "throughput": 5610.92, "total_tokens": 12881856} +{"current_steps": 26155, "total_steps": 37885, "loss": 0.0009, "lr": 5.294567125452785e-07, "epoch": 3.451893889402138, "percentage": 69.04, "elapsed_time": "0:38:16", "remaining_time": "0:17:09", "throughput": 5611.12, "total_tokens": 12884160} +{"current_steps": 26160, "total_steps": 37885, "loss": 0.0, "lr": 5.290502531055648e-07, "epoch": 3.4525537811798865, "percentage": 69.05, "elapsed_time": "0:38:16", "remaining_time": "0:17:09", "throughput": 5611.38, "total_tokens": 12886592} +{"current_steps": 26165, "total_steps": 37885, "loss": 0.0308, "lr": 5.286438936215239e-07, "epoch": 3.453213672957635, "percentage": 69.06, "elapsed_time": "0:38:16", "remaining_time": "0:17:08", "throughput": 5611.56, "total_tokens": 12888832} +{"current_steps": 26170, "total_steps": 37885, "loss": 0.0875, "lr": 5.282376341794033e-07, "epoch": 3.4538735647353835, "percentage": 69.08, "elapsed_time": "0:38:17", "remaining_time": "0:17:08", "throughput": 5611.79, "total_tokens": 12891200} +{"current_steps": 26175, "total_steps": 37885, "loss": 0.0, "lr": 5.278314748654287e-07, "epoch": 3.454533456513132, "percentage": 69.09, "elapsed_time": "0:38:17", "remaining_time": "0:17:07", "throughput": 5612.18, "total_tokens": 12893952} +{"current_steps": 26180, "total_steps": 37885, "loss": 0.0025, "lr": 5.274254157658048e-07, "epoch": 3.45519334829088, "percentage": 69.1, "elapsed_time": "0:38:17", "remaining_time": "0:17:07", "throughput": 5612.48, "total_tokens": 12896512} +{"current_steps": 26185, "total_steps": 37885, "loss": 0.0, "lr": 5.270194569667139e-07, "epoch": 3.455853240068629, "percentage": 69.12, "elapsed_time": "0:38:18", "remaining_time": "0:17:06", "throughput": 5612.71, "total_tokens": 12898880} +{"current_steps": 26190, "total_steps": 37885, "loss": 0.0949, "lr": 5.266135985543181e-07, "epoch": 3.456513131846377, "percentage": 69.13, "elapsed_time": "0:38:18", "remaining_time": "0:17:06", "throughput": 5613.09, "total_tokens": 12901632} +{"current_steps": 26195, "total_steps": 37885, "loss": 0.0, "lr": 5.262078406147585e-07, "epoch": 3.457173023624126, "percentage": 69.14, "elapsed_time": "0:38:18", "remaining_time": "0:17:05", "throughput": 5613.24, "total_tokens": 12903808} +{"current_steps": 26200, "total_steps": 37885, "loss": 0.0, "lr": 5.258021832341534e-07, "epoch": 3.457832915401874, "percentage": 69.16, "elapsed_time": "0:38:19", "remaining_time": "0:17:05", "throughput": 5613.45, "total_tokens": 12906112} +{"current_steps": 26205, "total_steps": 37885, "loss": 0.0007, "lr": 5.25396626498601e-07, "epoch": 3.4584928071796224, "percentage": 69.17, "elapsed_time": "0:38:19", "remaining_time": "0:17:04", "throughput": 5613.66, "total_tokens": 12908416} +{"current_steps": 26210, "total_steps": 37885, "loss": 0.0, "lr": 5.249911704941782e-07, "epoch": 3.459152698957371, "percentage": 69.18, "elapsed_time": "0:38:19", "remaining_time": "0:17:04", "throughput": 5613.91, "total_tokens": 12910848} +{"current_steps": 26215, "total_steps": 37885, "loss": 0.0, "lr": 5.245858153069394e-07, "epoch": 3.4598125907351194, "percentage": 69.2, "elapsed_time": "0:38:20", "remaining_time": "0:17:03", "throughput": 5614.21, "total_tokens": 12913408} +{"current_steps": 26220, "total_steps": 37885, "loss": 0.0016, "lr": 5.241805610229185e-07, "epoch": 3.460472482512868, "percentage": 69.21, "elapsed_time": "0:38:20", "remaining_time": "0:17:03", "throughput": 5614.54, "total_tokens": 12916032} +{"current_steps": 26225, "total_steps": 37885, "loss": 0.0252, "lr": 5.23775407728128e-07, "epoch": 3.4611323742906164, "percentage": 69.22, "elapsed_time": "0:38:20", "remaining_time": "0:17:02", "throughput": 5614.82, "total_tokens": 12918528} +{"current_steps": 26230, "total_steps": 37885, "loss": 0.0, "lr": 5.23370355508559e-07, "epoch": 3.4617922660683647, "percentage": 69.24, "elapsed_time": "0:38:21", "remaining_time": "0:17:02", "throughput": 5615.08, "total_tokens": 12920960} +{"current_steps": 26235, "total_steps": 37885, "loss": 0.0, "lr": 5.229654044501802e-07, "epoch": 3.4624521578461134, "percentage": 69.25, "elapsed_time": "0:38:21", "remaining_time": "0:17:01", "throughput": 5615.35, "total_tokens": 12923456} +{"current_steps": 26240, "total_steps": 37885, "loss": 0.0, "lr": 5.2256055463894e-07, "epoch": 3.4631120496238617, "percentage": 69.26, "elapsed_time": "0:38:21", "remaining_time": "0:17:01", "throughput": 5615.55, "total_tokens": 12925760} +{"current_steps": 26245, "total_steps": 37885, "loss": 0.0, "lr": 5.221558061607649e-07, "epoch": 3.4637719414016104, "percentage": 69.28, "elapsed_time": "0:38:22", "remaining_time": "0:17:01", "throughput": 5615.89, "total_tokens": 12928384} +{"current_steps": 26250, "total_steps": 37885, "loss": 0.0, "lr": 5.217511591015595e-07, "epoch": 3.4644318331793587, "percentage": 69.29, "elapsed_time": "0:38:22", "remaining_time": "0:17:00", "throughput": 5616.14, "total_tokens": 12930816} +{"current_steps": 26255, "total_steps": 37885, "loss": 0.0066, "lr": 5.213466135472072e-07, "epoch": 3.465091724957107, "percentage": 69.3, "elapsed_time": "0:38:22", "remaining_time": "0:17:00", "throughput": 5616.52, "total_tokens": 12933568} +{"current_steps": 26260, "total_steps": 37885, "loss": 0.0005, "lr": 5.209421695835701e-07, "epoch": 3.4657516167348557, "percentage": 69.32, "elapsed_time": "0:38:23", "remaining_time": "0:16:59", "throughput": 5616.72, "total_tokens": 12935872} +{"current_steps": 26265, "total_steps": 37885, "loss": 0.0, "lr": 5.205378272964889e-07, "epoch": 3.466411508512604, "percentage": 69.33, "elapsed_time": "0:38:23", "remaining_time": "0:16:59", "throughput": 5616.91, "total_tokens": 12938176} +{"current_steps": 26270, "total_steps": 37885, "loss": 0.0396, "lr": 5.201335867717818e-07, "epoch": 3.467071400290352, "percentage": 69.34, "elapsed_time": "0:38:23", "remaining_time": "0:16:58", "throughput": 5617.09, "total_tokens": 12940416} +{"current_steps": 26275, "total_steps": 37885, "loss": 0.0072, "lr": 5.197294480952452e-07, "epoch": 3.467731292068101, "percentage": 69.35, "elapsed_time": "0:38:24", "remaining_time": "0:16:58", "throughput": 5617.42, "total_tokens": 12943040} +{"current_steps": 26280, "total_steps": 37885, "loss": 0.0001, "lr": 5.193254113526561e-07, "epoch": 3.468391183845849, "percentage": 69.37, "elapsed_time": "0:38:24", "remaining_time": "0:16:57", "throughput": 5617.64, "total_tokens": 12945408} +{"current_steps": 26285, "total_steps": 37885, "loss": 0.0396, "lr": 5.189214766297675e-07, "epoch": 3.4690510756235975, "percentage": 69.38, "elapsed_time": "0:38:24", "remaining_time": "0:16:57", "throughput": 5617.89, "total_tokens": 12947840} +{"current_steps": 26290, "total_steps": 37885, "loss": 0.0, "lr": 5.18517644012312e-07, "epoch": 3.469710967401346, "percentage": 69.39, "elapsed_time": "0:38:25", "remaining_time": "0:16:56", "throughput": 5618.05, "total_tokens": 12950016} +{"current_steps": 26295, "total_steps": 37885, "loss": 0.0017, "lr": 5.181139135859996e-07, "epoch": 3.4703708591790945, "percentage": 69.41, "elapsed_time": "0:38:25", "remaining_time": "0:16:56", "throughput": 5618.45, "total_tokens": 12952832} +{"current_steps": 26300, "total_steps": 37885, "loss": 0.0648, "lr": 5.177102854365196e-07, "epoch": 3.471030750956843, "percentage": 69.42, "elapsed_time": "0:38:25", "remaining_time": "0:16:55", "throughput": 5618.67, "total_tokens": 12955200} +{"current_steps": 26305, "total_steps": 37885, "loss": 0.0, "lr": 5.173067596495393e-07, "epoch": 3.4716906427345915, "percentage": 69.43, "elapsed_time": "0:38:26", "remaining_time": "0:16:55", "throughput": 5618.9, "total_tokens": 12957568} +{"current_steps": 26310, "total_steps": 37885, "loss": 0.0, "lr": 5.16903336310703e-07, "epoch": 3.4723505345123398, "percentage": 69.45, "elapsed_time": "0:38:26", "remaining_time": "0:16:54", "throughput": 5619.16, "total_tokens": 12960000} +{"current_steps": 26315, "total_steps": 37885, "loss": 0.0, "lr": 5.165000155056363e-07, "epoch": 3.4730104262900885, "percentage": 69.46, "elapsed_time": "0:38:26", "remaining_time": "0:16:54", "throughput": 5619.39, "total_tokens": 12962368} +{"current_steps": 26320, "total_steps": 37885, "loss": 0.0007, "lr": 5.1609679731994e-07, "epoch": 3.4736703180678368, "percentage": 69.47, "elapsed_time": "0:38:27", "remaining_time": "0:16:53", "throughput": 5619.67, "total_tokens": 12964864} +{"current_steps": 26325, "total_steps": 37885, "loss": 0.147, "lr": 5.156936818391937e-07, "epoch": 3.4743302098455855, "percentage": 69.49, "elapsed_time": "0:38:27", "remaining_time": "0:16:53", "throughput": 5620.0, "total_tokens": 12967488} +{"current_steps": 26330, "total_steps": 37885, "loss": 0.0, "lr": 5.152906691489566e-07, "epoch": 3.4749901016233338, "percentage": 69.5, "elapsed_time": "0:38:27", "remaining_time": "0:16:52", "throughput": 5620.33, "total_tokens": 12970112} +{"current_steps": 26335, "total_steps": 37885, "loss": 0.0147, "lr": 5.148877593347649e-07, "epoch": 3.475649993401082, "percentage": 69.51, "elapsed_time": "0:38:28", "remaining_time": "0:16:52", "throughput": 5620.66, "total_tokens": 12972736} +{"current_steps": 26340, "total_steps": 37885, "loss": 0.0, "lr": 5.144849524821337e-07, "epoch": 3.4763098851788308, "percentage": 69.53, "elapsed_time": "0:38:28", "remaining_time": "0:16:51", "throughput": 5620.96, "total_tokens": 12975296} +{"current_steps": 26345, "total_steps": 37885, "loss": 0.0002, "lr": 5.140822486765552e-07, "epoch": 3.476969776956579, "percentage": 69.54, "elapsed_time": "0:38:28", "remaining_time": "0:16:51", "throughput": 5621.3, "total_tokens": 12977920} +{"current_steps": 26350, "total_steps": 37885, "loss": 0.0, "lr": 5.136796480035007e-07, "epoch": 3.4776296687343278, "percentage": 69.55, "elapsed_time": "0:38:29", "remaining_time": "0:16:50", "throughput": 5621.6, "total_tokens": 12980480} +{"current_steps": 26355, "total_steps": 37885, "loss": 0.0, "lr": 5.132771505484197e-07, "epoch": 3.478289560512076, "percentage": 69.57, "elapsed_time": "0:38:29", "remaining_time": "0:16:50", "throughput": 5621.95, "total_tokens": 12983168} +{"current_steps": 26360, "total_steps": 37885, "loss": 0.0004, "lr": 5.128747563967384e-07, "epoch": 3.4789494522898243, "percentage": 69.58, "elapsed_time": "0:38:29", "remaining_time": "0:16:49", "throughput": 5622.2, "total_tokens": 12985600} +{"current_steps": 26365, "total_steps": 37885, "loss": 0.0, "lr": 5.124724656338626e-07, "epoch": 3.479609344067573, "percentage": 69.59, "elapsed_time": "0:38:30", "remaining_time": "0:16:49", "throughput": 5622.6, "total_tokens": 12988416} +{"current_steps": 26370, "total_steps": 37885, "loss": 0.0, "lr": 5.12070278345176e-07, "epoch": 3.4802692358453213, "percentage": 69.61, "elapsed_time": "0:38:30", "remaining_time": "0:16:48", "throughput": 5622.78, "total_tokens": 12990656} +{"current_steps": 26375, "total_steps": 37885, "loss": 0.0, "lr": 5.116681946160391e-07, "epoch": 3.48092912762307, "percentage": 69.62, "elapsed_time": "0:38:30", "remaining_time": "0:16:48", "throughput": 5623.16, "total_tokens": 12993408} +{"current_steps": 26380, "total_steps": 37885, "loss": 0.0323, "lr": 5.112662145317917e-07, "epoch": 3.4815890194008183, "percentage": 69.63, "elapsed_time": "0:38:31", "remaining_time": "0:16:47", "throughput": 5623.28, "total_tokens": 12995520} +{"current_steps": 26385, "total_steps": 37885, "loss": 0.1026, "lr": 5.108643381777511e-07, "epoch": 3.4822489111785666, "percentage": 69.64, "elapsed_time": "0:38:31", "remaining_time": "0:16:47", "throughput": 5623.48, "total_tokens": 12997824} +{"current_steps": 26390, "total_steps": 37885, "loss": 0.0431, "lr": 5.104625656392132e-07, "epoch": 3.4829088029563153, "percentage": 69.66, "elapsed_time": "0:38:31", "remaining_time": "0:16:46", "throughput": 5623.69, "total_tokens": 13000128} +{"current_steps": 26395, "total_steps": 37885, "loss": 0.0, "lr": 5.100608970014501e-07, "epoch": 3.4835686947340636, "percentage": 69.67, "elapsed_time": "0:38:32", "remaining_time": "0:16:46", "throughput": 5623.97, "total_tokens": 13002624} +{"current_steps": 26400, "total_steps": 37885, "loss": 0.0002, "lr": 5.09659332349714e-07, "epoch": 3.484228586511812, "percentage": 69.68, "elapsed_time": "0:38:32", "remaining_time": "0:16:45", "throughput": 5624.23, "total_tokens": 13005120} +{"current_steps": 26405, "total_steps": 37885, "loss": 0.0, "lr": 5.092578717692341e-07, "epoch": 3.4848884782895606, "percentage": 69.7, "elapsed_time": "0:38:32", "remaining_time": "0:16:45", "throughput": 5624.59, "total_tokens": 13007808} +{"current_steps": 26410, "total_steps": 37885, "loss": 0.0523, "lr": 5.088565153452171e-07, "epoch": 3.485548370067309, "percentage": 69.71, "elapsed_time": "0:38:32", "remaining_time": "0:16:44", "throughput": 5624.77, "total_tokens": 13010048} +{"current_steps": 26415, "total_steps": 37885, "loss": 0.0, "lr": 5.084552631628479e-07, "epoch": 3.486208261845057, "percentage": 69.72, "elapsed_time": "0:38:33", "remaining_time": "0:16:44", "throughput": 5625.08, "total_tokens": 13012608} +{"current_steps": 26420, "total_steps": 37885, "loss": 0.0472, "lr": 5.080541153072902e-07, "epoch": 3.486868153622806, "percentage": 69.74, "elapsed_time": "0:38:33", "remaining_time": "0:16:44", "throughput": 5625.33, "total_tokens": 13015040} +{"current_steps": 26425, "total_steps": 37885, "loss": 0.0, "lr": 5.076530718636834e-07, "epoch": 3.487528045400554, "percentage": 69.75, "elapsed_time": "0:38:33", "remaining_time": "0:16:43", "throughput": 5625.48, "total_tokens": 13017216} +{"current_steps": 26430, "total_steps": 37885, "loss": 0.0001, "lr": 5.07252132917147e-07, "epoch": 3.488187937178303, "percentage": 69.76, "elapsed_time": "0:38:34", "remaining_time": "0:16:43", "throughput": 5625.73, "total_tokens": 13019648} +{"current_steps": 26435, "total_steps": 37885, "loss": 0.0309, "lr": 5.068512985527773e-07, "epoch": 3.488847828956051, "percentage": 69.78, "elapsed_time": "0:38:34", "remaining_time": "0:16:42", "throughput": 5625.89, "total_tokens": 13021824} +{"current_steps": 26440, "total_steps": 37885, "loss": 0.0554, "lr": 5.064505688556486e-07, "epoch": 3.4895077207337994, "percentage": 69.79, "elapsed_time": "0:38:34", "remaining_time": "0:16:42", "throughput": 5626.11, "total_tokens": 13024192} +{"current_steps": 26445, "total_steps": 37885, "loss": 0.0002, "lr": 5.060499439108127e-07, "epoch": 3.490167612511548, "percentage": 69.8, "elapsed_time": "0:38:35", "remaining_time": "0:16:41", "throughput": 5626.35, "total_tokens": 13026624} +{"current_steps": 26450, "total_steps": 37885, "loss": 0.0, "lr": 5.056494238032985e-07, "epoch": 3.4908275042892964, "percentage": 69.82, "elapsed_time": "0:38:35", "remaining_time": "0:16:41", "throughput": 5626.65, "total_tokens": 13029184} +{"current_steps": 26455, "total_steps": 37885, "loss": 0.0001, "lr": 5.052490086181151e-07, "epoch": 3.491487396067045, "percentage": 69.83, "elapsed_time": "0:38:35", "remaining_time": "0:16:40", "throughput": 5626.91, "total_tokens": 13031616} +{"current_steps": 26460, "total_steps": 37885, "loss": 0.0, "lr": 5.048486984402467e-07, "epoch": 3.4921472878447934, "percentage": 69.84, "elapsed_time": "0:38:36", "remaining_time": "0:16:40", "throughput": 5627.16, "total_tokens": 13034048} +{"current_steps": 26465, "total_steps": 37885, "loss": 0.0032, "lr": 5.044484933546565e-07, "epoch": 3.4928071796225417, "percentage": 69.86, "elapsed_time": "0:38:36", "remaining_time": "0:16:39", "throughput": 5627.41, "total_tokens": 13036480} +{"current_steps": 26470, "total_steps": 37885, "loss": 0.0, "lr": 5.040483934462849e-07, "epoch": 3.4934670714002904, "percentage": 69.87, "elapsed_time": "0:38:36", "remaining_time": "0:16:39", "throughput": 5627.74, "total_tokens": 13039104} +{"current_steps": 26475, "total_steps": 37885, "loss": 0.0004, "lr": 5.036483988000504e-07, "epoch": 3.4941269631780387, "percentage": 69.88, "elapsed_time": "0:38:37", "remaining_time": "0:16:38", "throughput": 5628.07, "total_tokens": 13041728} +{"current_steps": 26480, "total_steps": 37885, "loss": 0.0001, "lr": 5.032485095008494e-07, "epoch": 3.4947868549557874, "percentage": 69.9, "elapsed_time": "0:38:37", "remaining_time": "0:16:38", "throughput": 5628.38, "total_tokens": 13044288} +{"current_steps": 26485, "total_steps": 37885, "loss": 0.0, "lr": 5.028487256335541e-07, "epoch": 3.4954467467335357, "percentage": 69.91, "elapsed_time": "0:38:37", "remaining_time": "0:16:37", "throughput": 5628.66, "total_tokens": 13046784} +{"current_steps": 26490, "total_steps": 37885, "loss": 0.0, "lr": 5.024490472830176e-07, "epoch": 3.496106638511284, "percentage": 69.92, "elapsed_time": "0:38:38", "remaining_time": "0:16:37", "throughput": 5629.03, "total_tokens": 13049536} +{"current_steps": 26495, "total_steps": 37885, "loss": 0.0004, "lr": 5.020494745340677e-07, "epoch": 3.4967665302890327, "percentage": 69.94, "elapsed_time": "0:38:38", "remaining_time": "0:16:36", "throughput": 5629.29, "total_tokens": 13051968} +{"current_steps": 26500, "total_steps": 37885, "loss": 0.0899, "lr": 5.016500074715108e-07, "epoch": 3.497426422066781, "percentage": 69.95, "elapsed_time": "0:38:38", "remaining_time": "0:16:36", "throughput": 5629.61, "total_tokens": 13054592} +{"current_steps": 26505, "total_steps": 37885, "loss": 0.0003, "lr": 5.01250646180131e-07, "epoch": 3.4980863138445297, "percentage": 69.96, "elapsed_time": "0:38:39", "remaining_time": "0:16:35", "throughput": 5629.89, "total_tokens": 13057088} +{"current_steps": 26510, "total_steps": 37885, "loss": 0.0, "lr": 5.008513907446898e-07, "epoch": 3.498746205622278, "percentage": 69.97, "elapsed_time": "0:38:39", "remaining_time": "0:16:35", "throughput": 5630.22, "total_tokens": 13059712} +{"current_steps": 26515, "total_steps": 37885, "loss": 0.0688, "lr": 5.004522412499267e-07, "epoch": 3.4994060974000263, "percentage": 69.99, "elapsed_time": "0:38:40", "remaining_time": "0:16:34", "throughput": 5629.9, "total_tokens": 13062336} +{"current_steps": 26520, "total_steps": 37885, "loss": 0.0, "lr": 5.000531977805575e-07, "epoch": 3.500065989177775, "percentage": 70.0, "elapsed_time": "0:38:40", "remaining_time": "0:16:34", "throughput": 5630.1, "total_tokens": 13064640} +{"current_steps": 26525, "total_steps": 37885, "loss": 0.0441, "lr": 4.99654260421277e-07, "epoch": 3.5007258809555233, "percentage": 70.01, "elapsed_time": "0:38:40", "remaining_time": "0:16:33", "throughput": 5630.45, "total_tokens": 13067328} +{"current_steps": 26530, "total_steps": 37885, "loss": 0.0, "lr": 4.992554292567568e-07, "epoch": 3.501385772733272, "percentage": 70.03, "elapsed_time": "0:38:41", "remaining_time": "0:16:33", "throughput": 5630.72, "total_tokens": 13069824} +{"current_steps": 26530, "total_steps": 37885, "eval_loss": 0.18093986809253693, "epoch": 3.501385772733272, "percentage": 70.03, "elapsed_time": "0:38:49", "remaining_time": "0:16:36", "throughput": 5611.57, "total_tokens": 13069824} +{"current_steps": 26535, "total_steps": 37885, "loss": 0.0002, "lr": 4.988567043716452e-07, "epoch": 3.5020456645110203, "percentage": 70.04, "elapsed_time": "0:39:23", "remaining_time": "0:16:50", "throughput": 5531.44, "total_tokens": 13072000} +{"current_steps": 26540, "total_steps": 37885, "loss": 0.0002, "lr": 4.984580858505691e-07, "epoch": 3.5027055562887686, "percentage": 70.05, "elapsed_time": "0:39:23", "remaining_time": "0:16:50", "throughput": 5531.64, "total_tokens": 13074304} +{"current_steps": 26545, "total_steps": 37885, "loss": 0.0001, "lr": 4.980595737781328e-07, "epoch": 3.503365448066517, "percentage": 70.07, "elapsed_time": "0:39:23", "remaining_time": "0:16:49", "throughput": 5531.97, "total_tokens": 13076928} +{"current_steps": 26550, "total_steps": 37885, "loss": 0.0012, "lr": 4.976611682389168e-07, "epoch": 3.5040253398442656, "percentage": 70.08, "elapsed_time": "0:39:24", "remaining_time": "0:16:49", "throughput": 5532.22, "total_tokens": 13079360} +{"current_steps": 26555, "total_steps": 37885, "loss": 0.0032, "lr": 4.972628693174802e-07, "epoch": 3.504685231622014, "percentage": 70.09, "elapsed_time": "0:39:24", "remaining_time": "0:16:48", "throughput": 5532.48, "total_tokens": 13081792} +{"current_steps": 26560, "total_steps": 37885, "loss": 0.0, "lr": 4.96864677098359e-07, "epoch": 3.5053451233997626, "percentage": 70.11, "elapsed_time": "0:39:24", "remaining_time": "0:16:48", "throughput": 5532.79, "total_tokens": 13084352} +{"current_steps": 26565, "total_steps": 37885, "loss": 0.0611, "lr": 4.964665916660671e-07, "epoch": 3.506005015177511, "percentage": 70.12, "elapsed_time": "0:39:25", "remaining_time": "0:16:47", "throughput": 5533.04, "total_tokens": 13086784} +{"current_steps": 26570, "total_steps": 37885, "loss": 0.0001, "lr": 4.960686131050945e-07, "epoch": 3.506664906955259, "percentage": 70.13, "elapsed_time": "0:39:25", "remaining_time": "0:16:47", "throughput": 5533.35, "total_tokens": 13089344} +{"current_steps": 26575, "total_steps": 37885, "loss": 0.0, "lr": 4.956707414999095e-07, "epoch": 3.507324798733008, "percentage": 70.15, "elapsed_time": "0:39:25", "remaining_time": "0:16:46", "throughput": 5533.58, "total_tokens": 13091712} +{"current_steps": 26580, "total_steps": 37885, "loss": 0.0, "lr": 4.95272976934958e-07, "epoch": 3.507984690510756, "percentage": 70.16, "elapsed_time": "0:39:26", "remaining_time": "0:16:46", "throughput": 5533.86, "total_tokens": 13094208} +{"current_steps": 26585, "total_steps": 37885, "loss": 0.0759, "lr": 4.948753194946617e-07, "epoch": 3.508644582288505, "percentage": 70.17, "elapsed_time": "0:39:26", "remaining_time": "0:16:45", "throughput": 5534.17, "total_tokens": 13096768} +{"current_steps": 26590, "total_steps": 37885, "loss": 0.0011, "lr": 4.944777692634211e-07, "epoch": 3.509304474066253, "percentage": 70.19, "elapsed_time": "0:39:26", "remaining_time": "0:16:45", "throughput": 5534.46, "total_tokens": 13099264} +{"current_steps": 26595, "total_steps": 37885, "loss": 0.0011, "lr": 4.940803263256133e-07, "epoch": 3.5099643658440014, "percentage": 70.2, "elapsed_time": "0:39:27", "remaining_time": "0:16:44", "throughput": 5534.81, "total_tokens": 13101952} +{"current_steps": 26600, "total_steps": 37885, "loss": 0.0, "lr": 4.936829907655929e-07, "epoch": 3.51062425762175, "percentage": 70.21, "elapsed_time": "0:39:27", "remaining_time": "0:16:44", "throughput": 5535.19, "total_tokens": 13104704} +{"current_steps": 26605, "total_steps": 37885, "loss": 0.0, "lr": 4.932857626676914e-07, "epoch": 3.5112841493994984, "percentage": 70.23, "elapsed_time": "0:39:27", "remaining_time": "0:16:43", "throughput": 5535.42, "total_tokens": 13107072} +{"current_steps": 26610, "total_steps": 37885, "loss": 0.0846, "lr": 4.928886421162166e-07, "epoch": 3.511944041177247, "percentage": 70.24, "elapsed_time": "0:39:28", "remaining_time": "0:16:43", "throughput": 5535.63, "total_tokens": 13109376} +{"current_steps": 26615, "total_steps": 37885, "loss": 0.0518, "lr": 4.924916291954561e-07, "epoch": 3.5126039329549954, "percentage": 70.25, "elapsed_time": "0:39:28", "remaining_time": "0:16:42", "throughput": 5535.94, "total_tokens": 13111936} +{"current_steps": 26620, "total_steps": 37885, "loss": 0.0, "lr": 4.920947239896717e-07, "epoch": 3.5132638247327437, "percentage": 70.27, "elapsed_time": "0:39:28", "remaining_time": "0:16:42", "throughput": 5536.21, "total_tokens": 13114432} +{"current_steps": 26625, "total_steps": 37885, "loss": 0.0, "lr": 4.916979265831043e-07, "epoch": 3.5139237165104924, "percentage": 70.28, "elapsed_time": "0:39:29", "remaining_time": "0:16:41", "throughput": 5536.45, "total_tokens": 13116800} +{"current_steps": 26630, "total_steps": 37885, "loss": 0.0003, "lr": 4.913012370599715e-07, "epoch": 3.5145836082882407, "percentage": 70.29, "elapsed_time": "0:39:29", "remaining_time": "0:16:41", "throughput": 5536.73, "total_tokens": 13119296} +{"current_steps": 26635, "total_steps": 37885, "loss": 0.0001, "lr": 4.909046555044672e-07, "epoch": 3.5152435000659894, "percentage": 70.3, "elapsed_time": "0:39:29", "remaining_time": "0:16:40", "throughput": 5536.96, "total_tokens": 13121664} +{"current_steps": 26640, "total_steps": 37885, "loss": 0.0035, "lr": 4.905081820007634e-07, "epoch": 3.5159033918437377, "percentage": 70.32, "elapsed_time": "0:39:30", "remaining_time": "0:16:40", "throughput": 5537.29, "total_tokens": 13124288} +{"current_steps": 26645, "total_steps": 37885, "loss": 0.0002, "lr": 4.901118166330077e-07, "epoch": 3.516563283621486, "percentage": 70.33, "elapsed_time": "0:39:30", "remaining_time": "0:16:39", "throughput": 5537.57, "total_tokens": 13126784} +{"current_steps": 26650, "total_steps": 37885, "loss": 0.0004, "lr": 4.897155594853275e-07, "epoch": 3.5172231753992347, "percentage": 70.34, "elapsed_time": "0:39:30", "remaining_time": "0:16:39", "throughput": 5537.81, "total_tokens": 13129216} +{"current_steps": 26655, "total_steps": 37885, "loss": 0.0004, "lr": 4.893194106418246e-07, "epoch": 3.517883067176983, "percentage": 70.36, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5537.98, "total_tokens": 13131456} +{"current_steps": 26660, "total_steps": 37885, "loss": 0.0004, "lr": 4.889233701865782e-07, "epoch": 3.5185429589547317, "percentage": 70.37, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5538.19, "total_tokens": 13133824} +{"current_steps": 26665, "total_steps": 37885, "loss": 0.0104, "lr": 4.885274382036457e-07, "epoch": 3.51920285073248, "percentage": 70.38, "elapsed_time": "0:39:31", "remaining_time": "0:16:38", "throughput": 5538.42, "total_tokens": 13136256} +{"current_steps": 26670, "total_steps": 37885, "loss": 0.0014, "lr": 4.881316147770607e-07, "epoch": 3.5198627425102282, "percentage": 70.4, "elapsed_time": "0:39:32", "remaining_time": "0:16:37", "throughput": 5538.75, "total_tokens": 13138944} +{"current_steps": 26675, "total_steps": 37885, "loss": 0.0, "lr": 4.877358999908339e-07, "epoch": 3.5205226342879765, "percentage": 70.41, "elapsed_time": "0:39:32", "remaining_time": "0:16:37", "throughput": 5539.04, "total_tokens": 13141504} +{"current_steps": 26680, "total_steps": 37885, "loss": 0.0001, "lr": 4.873402939289527e-07, "epoch": 3.5211825260657252, "percentage": 70.42, "elapsed_time": "0:39:32", "remaining_time": "0:16:36", "throughput": 5539.12, "total_tokens": 13143552} +{"current_steps": 26685, "total_steps": 37885, "loss": 0.0, "lr": 4.869447966753816e-07, "epoch": 3.5218424178434735, "percentage": 70.44, "elapsed_time": "0:39:33", "remaining_time": "0:16:36", "throughput": 5539.38, "total_tokens": 13146048} +{"current_steps": 26690, "total_steps": 37885, "loss": 0.0, "lr": 4.865494083140627e-07, "epoch": 3.5225023096212222, "percentage": 70.45, "elapsed_time": "0:39:33", "remaining_time": "0:16:35", "throughput": 5539.53, "total_tokens": 13148288} +{"current_steps": 26695, "total_steps": 37885, "loss": 0.0001, "lr": 4.861541289289131e-07, "epoch": 3.5231622013989705, "percentage": 70.46, "elapsed_time": "0:39:33", "remaining_time": "0:16:35", "throughput": 5539.77, "total_tokens": 13150720} +{"current_steps": 26700, "total_steps": 37885, "loss": 0.0381, "lr": 4.857589586038289e-07, "epoch": 3.523822093176719, "percentage": 70.48, "elapsed_time": "0:39:34", "remaining_time": "0:16:34", "throughput": 5540.11, "total_tokens": 13153344} +{"current_steps": 26705, "total_steps": 37885, "loss": 0.0044, "lr": 4.853638974226822e-07, "epoch": 3.5244819849544675, "percentage": 70.49, "elapsed_time": "0:39:34", "remaining_time": "0:16:34", "throughput": 5540.39, "total_tokens": 13155840} +{"current_steps": 26710, "total_steps": 37885, "loss": 0.0003, "lr": 4.849689454693212e-07, "epoch": 3.525141876732216, "percentage": 70.5, "elapsed_time": "0:39:34", "remaining_time": "0:16:33", "throughput": 5540.64, "total_tokens": 13158272} +{"current_steps": 26715, "total_steps": 37885, "loss": 0.0004, "lr": 4.845741028275719e-07, "epoch": 3.5258017685099645, "percentage": 70.52, "elapsed_time": "0:39:35", "remaining_time": "0:16:33", "throughput": 5540.88, "total_tokens": 13160640} +{"current_steps": 26720, "total_steps": 37885, "loss": 0.0001, "lr": 4.841793695812369e-07, "epoch": 3.526461660287713, "percentage": 70.53, "elapsed_time": "0:39:35", "remaining_time": "0:16:32", "throughput": 5541.11, "total_tokens": 13163008} +{"current_steps": 26725, "total_steps": 37885, "loss": 0.075, "lr": 4.837847458140959e-07, "epoch": 3.527121552065461, "percentage": 70.54, "elapsed_time": "0:39:35", "remaining_time": "0:16:32", "throughput": 5541.37, "total_tokens": 13165440} +{"current_steps": 26730, "total_steps": 37885, "loss": 0.0, "lr": 4.833902316099039e-07, "epoch": 3.52778144384321, "percentage": 70.56, "elapsed_time": "0:39:36", "remaining_time": "0:16:31", "throughput": 5541.55, "total_tokens": 13167680} +{"current_steps": 26735, "total_steps": 37885, "loss": 0.0, "lr": 4.829958270523944e-07, "epoch": 3.528441335620958, "percentage": 70.57, "elapsed_time": "0:39:36", "remaining_time": "0:16:31", "throughput": 5541.66, "total_tokens": 13169728} +{"current_steps": 26740, "total_steps": 37885, "loss": 0.0626, "lr": 4.82601532225277e-07, "epoch": 3.529101227398707, "percentage": 70.58, "elapsed_time": "0:39:36", "remaining_time": "0:16:30", "throughput": 5541.89, "total_tokens": 13172096} +{"current_steps": 26745, "total_steps": 37885, "loss": 0.0001, "lr": 4.822073472122374e-07, "epoch": 3.529761119176455, "percentage": 70.6, "elapsed_time": "0:39:37", "remaining_time": "0:16:30", "throughput": 5542.15, "total_tokens": 13174528} +{"current_steps": 26750, "total_steps": 37885, "loss": 0.0, "lr": 4.818132720969387e-07, "epoch": 3.5304210109542034, "percentage": 70.61, "elapsed_time": "0:39:37", "remaining_time": "0:16:29", "throughput": 5542.4, "total_tokens": 13176960} +{"current_steps": 26755, "total_steps": 37885, "loss": 0.002, "lr": 4.814193069630211e-07, "epoch": 3.531080902731952, "percentage": 70.62, "elapsed_time": "0:39:37", "remaining_time": "0:16:29", "throughput": 5542.63, "total_tokens": 13179328} +{"current_steps": 26760, "total_steps": 37885, "loss": 0.0, "lr": 4.810254518941e-07, "epoch": 3.5317407945097004, "percentage": 70.63, "elapsed_time": "0:39:38", "remaining_time": "0:16:28", "throughput": 5542.9, "total_tokens": 13181824} +{"current_steps": 26765, "total_steps": 37885, "loss": 0.0, "lr": 4.806317069737684e-07, "epoch": 3.532400686287449, "percentage": 70.65, "elapsed_time": "0:39:38", "remaining_time": "0:16:28", "throughput": 5543.15, "total_tokens": 13184256} +{"current_steps": 26770, "total_steps": 37885, "loss": 0.0, "lr": 4.802380722855961e-07, "epoch": 3.5330605780651974, "percentage": 70.66, "elapsed_time": "0:39:38", "remaining_time": "0:16:27", "throughput": 5543.36, "total_tokens": 13186560} +{"current_steps": 26775, "total_steps": 37885, "loss": 0.0, "lr": 4.798445479131295e-07, "epoch": 3.5337204698429456, "percentage": 70.67, "elapsed_time": "0:39:39", "remaining_time": "0:16:27", "throughput": 5543.64, "total_tokens": 13189120} +{"current_steps": 26780, "total_steps": 37885, "loss": 0.0, "lr": 4.794511339398911e-07, "epoch": 3.5343803616206944, "percentage": 70.69, "elapsed_time": "0:39:39", "remaining_time": "0:16:26", "throughput": 5543.89, "total_tokens": 13191552} +{"current_steps": 26785, "total_steps": 37885, "loss": 0.0001, "lr": 4.790578304493791e-07, "epoch": 3.5350402533984426, "percentage": 70.7, "elapsed_time": "0:39:39", "remaining_time": "0:16:26", "throughput": 5544.09, "total_tokens": 13193856} +{"current_steps": 26790, "total_steps": 37885, "loss": 0.0891, "lr": 4.786646375250711e-07, "epoch": 3.5357001451761914, "percentage": 70.71, "elapsed_time": "0:39:40", "remaining_time": "0:16:25", "throughput": 5544.35, "total_tokens": 13196288} +{"current_steps": 26795, "total_steps": 37885, "loss": 0.0003, "lr": 4.78271555250418e-07, "epoch": 3.5363600369539396, "percentage": 70.73, "elapsed_time": "0:39:40", "remaining_time": "0:16:25", "throughput": 5544.59, "total_tokens": 13198720} +{"current_steps": 26800, "total_steps": 37885, "loss": 0.0001, "lr": 4.778785837088497e-07, "epoch": 3.537019928731688, "percentage": 70.74, "elapsed_time": "0:39:40", "remaining_time": "0:16:24", "throughput": 5544.84, "total_tokens": 13201152} +{"current_steps": 26805, "total_steps": 37885, "loss": 0.0001, "lr": 4.774857229837708e-07, "epoch": 3.537679820509436, "percentage": 70.75, "elapsed_time": "0:39:41", "remaining_time": "0:16:24", "throughput": 5545.08, "total_tokens": 13203584} +{"current_steps": 26810, "total_steps": 37885, "loss": 0.0, "lr": 4.770929731585634e-07, "epoch": 3.538339712287185, "percentage": 70.77, "elapsed_time": "0:39:41", "remaining_time": "0:16:23", "throughput": 5545.34, "total_tokens": 13206016} +{"current_steps": 26815, "total_steps": 37885, "loss": 0.0797, "lr": 4.7670033431658605e-07, "epoch": 3.538999604064933, "percentage": 70.78, "elapsed_time": "0:39:41", "remaining_time": "0:16:23", "throughput": 5545.51, "total_tokens": 13208256} +{"current_steps": 26820, "total_steps": 37885, "loss": 0.0, "lr": 4.7630780654117273e-07, "epoch": 3.539659495842682, "percentage": 70.79, "elapsed_time": "0:39:42", "remaining_time": "0:16:22", "throughput": 5545.83, "total_tokens": 13210880} +{"current_steps": 26825, "total_steps": 37885, "loss": 0.0001, "lr": 4.7591538991563594e-07, "epoch": 3.54031938762043, "percentage": 70.81, "elapsed_time": "0:39:42", "remaining_time": "0:16:22", "throughput": 5546.06, "total_tokens": 13213248} +{"current_steps": 26830, "total_steps": 37885, "loss": 0.0, "lr": 4.755230845232625e-07, "epoch": 3.5409792793981785, "percentage": 70.82, "elapsed_time": "0:39:42", "remaining_time": "0:16:21", "throughput": 5546.28, "total_tokens": 13215616} +{"current_steps": 26835, "total_steps": 37885, "loss": 0.0213, "lr": 4.7513089044731603e-07, "epoch": 3.541639171175927, "percentage": 70.83, "elapsed_time": "0:39:43", "remaining_time": "0:16:21", "throughput": 5546.49, "total_tokens": 13217920} +{"current_steps": 26840, "total_steps": 37885, "loss": 0.0, "lr": 4.7473880777103725e-07, "epoch": 3.5422990629536755, "percentage": 70.85, "elapsed_time": "0:39:43", "remaining_time": "0:16:20", "throughput": 5546.72, "total_tokens": 13220288} +{"current_steps": 26845, "total_steps": 37885, "loss": 0.0487, "lr": 4.74346836577643e-07, "epoch": 3.542958954731424, "percentage": 70.86, "elapsed_time": "0:39:43", "remaining_time": "0:16:20", "throughput": 5547.09, "total_tokens": 13223040} +{"current_steps": 26850, "total_steps": 37885, "loss": 0.0017, "lr": 4.7395497695032637e-07, "epoch": 3.5436188465091725, "percentage": 70.87, "elapsed_time": "0:39:44", "remaining_time": "0:16:19", "throughput": 5547.21, "total_tokens": 13225152} +{"current_steps": 26855, "total_steps": 37885, "loss": 0.0, "lr": 4.735632289722563e-07, "epoch": 3.5442787382869207, "percentage": 70.89, "elapsed_time": "0:39:44", "remaining_time": "0:16:19", "throughput": 5547.49, "total_tokens": 13227648} +{"current_steps": 26860, "total_steps": 37885, "loss": 0.0, "lr": 4.731715927265787e-07, "epoch": 3.5449386300646695, "percentage": 70.9, "elapsed_time": "0:39:44", "remaining_time": "0:16:18", "throughput": 5547.73, "total_tokens": 13230080} +{"current_steps": 26865, "total_steps": 37885, "loss": 0.1657, "lr": 4.727800682964159e-07, "epoch": 3.5455985218424177, "percentage": 70.91, "elapsed_time": "0:39:45", "remaining_time": "0:16:18", "throughput": 5548.07, "total_tokens": 13232768} +{"current_steps": 26870, "total_steps": 37885, "loss": 0.0, "lr": 4.723886557648655e-07, "epoch": 3.5462584136201665, "percentage": 70.93, "elapsed_time": "0:39:45", "remaining_time": "0:16:17", "throughput": 5548.25, "total_tokens": 13235008} +{"current_steps": 26875, "total_steps": 37885, "loss": 0.0839, "lr": 4.719973552150022e-07, "epoch": 3.5469183053979148, "percentage": 70.94, "elapsed_time": "0:39:45", "remaining_time": "0:16:17", "throughput": 5548.59, "total_tokens": 13237696} +{"current_steps": 26880, "total_steps": 37885, "loss": 0.0, "lr": 4.7160616672987674e-07, "epoch": 3.547578197175663, "percentage": 70.95, "elapsed_time": "0:39:46", "remaining_time": "0:16:16", "throughput": 5548.84, "total_tokens": 13240192} +{"current_steps": 26885, "total_steps": 37885, "loss": 0.0, "lr": 4.712150903925165e-07, "epoch": 3.5482380889534118, "percentage": 70.96, "elapsed_time": "0:39:46", "remaining_time": "0:16:16", "throughput": 5549.05, "total_tokens": 13242496} +{"current_steps": 26890, "total_steps": 37885, "loss": 0.0, "lr": 4.708241262859237e-07, "epoch": 3.54889798073116, "percentage": 70.98, "elapsed_time": "0:39:46", "remaining_time": "0:16:15", "throughput": 5549.26, "total_tokens": 13244864} +{"current_steps": 26895, "total_steps": 37885, "loss": 0.0, "lr": 4.7043327449307813e-07, "epoch": 3.5495578725089088, "percentage": 70.99, "elapsed_time": "0:39:47", "remaining_time": "0:16:15", "throughput": 5549.43, "total_tokens": 13247104} +{"current_steps": 26900, "total_steps": 37885, "loss": 0.0, "lr": 4.700425350969357e-07, "epoch": 3.550217764286657, "percentage": 71.0, "elapsed_time": "0:39:47", "remaining_time": "0:16:14", "throughput": 5549.67, "total_tokens": 13249536} +{"current_steps": 26905, "total_steps": 37885, "loss": 0.001, "lr": 4.696519081804271e-07, "epoch": 3.5508776560644053, "percentage": 71.02, "elapsed_time": "0:39:47", "remaining_time": "0:16:14", "throughput": 5549.89, "total_tokens": 13251904} +{"current_steps": 26910, "total_steps": 37885, "loss": 0.0, "lr": 4.6926139382646045e-07, "epoch": 3.551537547842154, "percentage": 71.03, "elapsed_time": "0:39:48", "remaining_time": "0:16:13", "throughput": 5550.13, "total_tokens": 13254336} +{"current_steps": 26915, "total_steps": 37885, "loss": 0.0, "lr": 4.6887099211792016e-07, "epoch": 3.5521974396199023, "percentage": 71.04, "elapsed_time": "0:39:48", "remaining_time": "0:16:13", "throughput": 5550.5, "total_tokens": 13257088} +{"current_steps": 26920, "total_steps": 37885, "loss": 0.0427, "lr": 4.6848070313766507e-07, "epoch": 3.552857331397651, "percentage": 71.06, "elapsed_time": "0:39:48", "remaining_time": "0:16:12", "throughput": 5550.77, "total_tokens": 13259584} +{"current_steps": 26925, "total_steps": 37885, "loss": 0.0, "lr": 4.68090526968532e-07, "epoch": 3.5535172231753993, "percentage": 71.07, "elapsed_time": "0:39:49", "remaining_time": "0:16:12", "throughput": 5551.07, "total_tokens": 13262208} +{"current_steps": 26930, "total_steps": 37885, "loss": 0.052, "lr": 4.677004636933327e-07, "epoch": 3.5541771149531476, "percentage": 71.08, "elapsed_time": "0:39:49", "remaining_time": "0:16:12", "throughput": 5551.33, "total_tokens": 13264704} +{"current_steps": 26935, "total_steps": 37885, "loss": 0.0, "lr": 4.673105133948557e-07, "epoch": 3.554837006730896, "percentage": 71.1, "elapsed_time": "0:39:49", "remaining_time": "0:16:11", "throughput": 5551.45, "total_tokens": 13266816} +{"current_steps": 26940, "total_steps": 37885, "loss": 0.0001, "lr": 4.6692067615586493e-07, "epoch": 3.5554968985086446, "percentage": 71.11, "elapsed_time": "0:39:50", "remaining_time": "0:16:11", "throughput": 5551.63, "total_tokens": 13269120} +{"current_steps": 26945, "total_steps": 37885, "loss": 0.0001, "lr": 4.6653095205909955e-07, "epoch": 3.556156790286393, "percentage": 71.12, "elapsed_time": "0:39:50", "remaining_time": "0:16:10", "throughput": 5551.73, "total_tokens": 13271232} +{"current_steps": 26950, "total_steps": 37885, "loss": 0.0, "lr": 4.661413411872772e-07, "epoch": 3.5568166820641416, "percentage": 71.14, "elapsed_time": "0:39:50", "remaining_time": "0:16:10", "throughput": 5551.91, "total_tokens": 13273536} +{"current_steps": 26955, "total_steps": 37885, "loss": 0.0281, "lr": 4.6575184362308904e-07, "epoch": 3.55747657384189, "percentage": 71.15, "elapsed_time": "0:39:51", "remaining_time": "0:16:09", "throughput": 5552.22, "total_tokens": 13276160} +{"current_steps": 26960, "total_steps": 37885, "loss": 0.0, "lr": 4.653624594492033e-07, "epoch": 3.558136465619638, "percentage": 71.16, "elapsed_time": "0:39:51", "remaining_time": "0:16:09", "throughput": 5552.35, "total_tokens": 13278336} +{"current_steps": 26965, "total_steps": 37885, "loss": 0.0001, "lr": 4.649731887482644e-07, "epoch": 3.558796357397387, "percentage": 71.18, "elapsed_time": "0:39:51", "remaining_time": "0:16:08", "throughput": 5552.6, "total_tokens": 13280832} +{"current_steps": 26970, "total_steps": 37885, "loss": 0.0, "lr": 4.645840316028914e-07, "epoch": 3.559456249175135, "percentage": 71.19, "elapsed_time": "0:39:52", "remaining_time": "0:16:08", "throughput": 5552.86, "total_tokens": 13283328} +{"current_steps": 26975, "total_steps": 37885, "loss": 0.002, "lr": 4.641949880956809e-07, "epoch": 3.560116140952884, "percentage": 71.2, "elapsed_time": "0:39:52", "remaining_time": "0:16:07", "throughput": 5553.04, "total_tokens": 13285632} +{"current_steps": 26980, "total_steps": 37885, "loss": 0.0003, "lr": 4.638060583092035e-07, "epoch": 3.560776032730632, "percentage": 71.22, "elapsed_time": "0:39:52", "remaining_time": "0:16:07", "throughput": 5553.2, "total_tokens": 13287872} +{"current_steps": 26985, "total_steps": 37885, "loss": 0.069, "lr": 4.634172423260081e-07, "epoch": 3.5614359245083804, "percentage": 71.23, "elapsed_time": "0:39:53", "remaining_time": "0:16:06", "throughput": 5553.52, "total_tokens": 13290560} +{"current_steps": 26990, "total_steps": 37885, "loss": 0.0322, "lr": 4.6302854022861735e-07, "epoch": 3.562095816286129, "percentage": 71.24, "elapsed_time": "0:39:53", "remaining_time": "0:16:06", "throughput": 5553.77, "total_tokens": 13293056} +{"current_steps": 26995, "total_steps": 37885, "loss": 0.0, "lr": 4.6263995209953024e-07, "epoch": 3.5627557080638774, "percentage": 71.26, "elapsed_time": "0:39:53", "remaining_time": "0:16:05", "throughput": 5554.0, "total_tokens": 13295488} +{"current_steps": 27000, "total_steps": 37885, "loss": 0.0, "lr": 4.622514780212219e-07, "epoch": 3.563415599841626, "percentage": 71.27, "elapsed_time": "0:39:54", "remaining_time": "0:16:05", "throughput": 5554.22, "total_tokens": 13297856} +{"current_steps": 27005, "total_steps": 37885, "loss": 0.0322, "lr": 4.618631180761434e-07, "epoch": 3.5640754916193744, "percentage": 71.28, "elapsed_time": "0:39:54", "remaining_time": "0:16:04", "throughput": 5554.51, "total_tokens": 13300416} +{"current_steps": 27010, "total_steps": 37885, "loss": 0.0, "lr": 4.6147487234672156e-07, "epoch": 3.5647353833971227, "percentage": 71.29, "elapsed_time": "0:39:54", "remaining_time": "0:16:04", "throughput": 5554.77, "total_tokens": 13302848} +{"current_steps": 27015, "total_steps": 37885, "loss": 0.0, "lr": 4.6108674091535795e-07, "epoch": 3.5653952751748714, "percentage": 71.31, "elapsed_time": "0:39:55", "remaining_time": "0:16:03", "throughput": 5555.04, "total_tokens": 13305344} +{"current_steps": 27020, "total_steps": 37885, "loss": 0.0, "lr": 4.6069872386443107e-07, "epoch": 3.5660551669526197, "percentage": 71.32, "elapsed_time": "0:39:55", "remaining_time": "0:16:03", "throughput": 5555.32, "total_tokens": 13307840} +{"current_steps": 27025, "total_steps": 37885, "loss": 0.0323, "lr": 4.6031082127629514e-07, "epoch": 3.5667150587303684, "percentage": 71.33, "elapsed_time": "0:39:55", "remaining_time": "0:16:02", "throughput": 5555.54, "total_tokens": 13310208} +{"current_steps": 27030, "total_steps": 37885, "loss": 0.0001, "lr": 4.5992303323327885e-07, "epoch": 3.5673749505081167, "percentage": 71.35, "elapsed_time": "0:39:56", "remaining_time": "0:16:02", "throughput": 5555.76, "total_tokens": 13312576} +{"current_steps": 27035, "total_steps": 37885, "loss": 0.0004, "lr": 4.5953535981768786e-07, "epoch": 3.568034842285865, "percentage": 71.36, "elapsed_time": "0:39:56", "remaining_time": "0:16:01", "throughput": 5555.9, "total_tokens": 13314752} +{"current_steps": 27040, "total_steps": 37885, "loss": 0.0, "lr": 4.591478011118034e-07, "epoch": 3.5686947340636137, "percentage": 71.37, "elapsed_time": "0:39:56", "remaining_time": "0:16:01", "throughput": 5556.14, "total_tokens": 13317184} +{"current_steps": 27045, "total_steps": 37885, "loss": 0.0585, "lr": 4.5876035719788133e-07, "epoch": 3.569354625841362, "percentage": 71.39, "elapsed_time": "0:39:57", "remaining_time": "0:16:00", "throughput": 5556.58, "total_tokens": 13320128} +{"current_steps": 27050, "total_steps": 37885, "loss": 0.0002, "lr": 4.5837302815815394e-07, "epoch": 3.5700145176191107, "percentage": 71.4, "elapsed_time": "0:39:57", "remaining_time": "0:16:00", "throughput": 5556.92, "total_tokens": 13322816} +{"current_steps": 27055, "total_steps": 37885, "loss": 0.0, "lr": 4.5798581407482927e-07, "epoch": 3.570674409396859, "percentage": 71.41, "elapsed_time": "0:39:57", "remaining_time": "0:15:59", "throughput": 5557.17, "total_tokens": 13325248} +{"current_steps": 27060, "total_steps": 37885, "loss": 0.0693, "lr": 4.5759871503009097e-07, "epoch": 3.5713343011746073, "percentage": 71.43, "elapsed_time": "0:39:58", "remaining_time": "0:15:59", "throughput": 5557.42, "total_tokens": 13327680} +{"current_steps": 27065, "total_steps": 37885, "loss": 0.0, "lr": 4.572117311060972e-07, "epoch": 3.5719941929523555, "percentage": 71.44, "elapsed_time": "0:39:58", "remaining_time": "0:15:58", "throughput": 5557.61, "total_tokens": 13329984} +{"current_steps": 27070, "total_steps": 37885, "loss": 0.0719, "lr": 4.56824862384983e-07, "epoch": 3.5726540847301043, "percentage": 71.45, "elapsed_time": "0:39:58", "remaining_time": "0:15:58", "throughput": 5557.81, "total_tokens": 13332288} +{"current_steps": 27075, "total_steps": 37885, "loss": 0.0176, "lr": 4.564381089488587e-07, "epoch": 3.573313976507853, "percentage": 71.47, "elapsed_time": "0:39:59", "remaining_time": "0:15:57", "throughput": 5557.93, "total_tokens": 13334400} +{"current_steps": 27080, "total_steps": 37885, "loss": 0.1063, "lr": 4.560514708798093e-07, "epoch": 3.5739738682856013, "percentage": 71.48, "elapsed_time": "0:39:59", "remaining_time": "0:15:57", "throughput": 5558.24, "total_tokens": 13337024} +{"current_steps": 27085, "total_steps": 37885, "loss": 0.0, "lr": 4.556649482598962e-07, "epoch": 3.5746337600633495, "percentage": 71.49, "elapsed_time": "0:39:59", "remaining_time": "0:15:56", "throughput": 5558.42, "total_tokens": 13339328} +{"current_steps": 27090, "total_steps": 37885, "loss": 0.0412, "lr": 4.552785411711565e-07, "epoch": 3.575293651841098, "percentage": 71.51, "elapsed_time": "0:40:00", "remaining_time": "0:15:56", "throughput": 5558.62, "total_tokens": 13341632} +{"current_steps": 27095, "total_steps": 37885, "loss": 0.0011, "lr": 4.548922496956015e-07, "epoch": 3.5759535436188465, "percentage": 71.52, "elapsed_time": "0:40:00", "remaining_time": "0:15:55", "throughput": 5558.82, "total_tokens": 13343936} +{"current_steps": 27100, "total_steps": 37885, "loss": 0.0, "lr": 4.54506073915219e-07, "epoch": 3.576613435396595, "percentage": 71.53, "elapsed_time": "0:40:00", "remaining_time": "0:15:55", "throughput": 5559.19, "total_tokens": 13346688} +{"current_steps": 27105, "total_steps": 37885, "loss": 0.0001, "lr": 4.541200139119723e-07, "epoch": 3.5772733271743435, "percentage": 71.55, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.53, "total_tokens": 13349376} +{"current_steps": 27110, "total_steps": 37885, "loss": 0.0673, "lr": 4.537340697678e-07, "epoch": 3.577933218952092, "percentage": 71.56, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.72, "total_tokens": 13351680} +{"current_steps": 27115, "total_steps": 37885, "loss": 0.0003, "lr": 4.533482415646157e-07, "epoch": 3.57859311072984, "percentage": 71.57, "elapsed_time": "0:40:01", "remaining_time": "0:15:54", "throughput": 5559.94, "total_tokens": 13354048} +{"current_steps": 27120, "total_steps": 37885, "loss": 0.004, "lr": 4.529625293843078e-07, "epoch": 3.579253002507589, "percentage": 71.59, "elapsed_time": "0:40:02", "remaining_time": "0:15:53", "throughput": 5560.16, "total_tokens": 13356416} +{"current_steps": 27125, "total_steps": 37885, "loss": 0.0, "lr": 4.525769333087425e-07, "epoch": 3.579912894285337, "percentage": 71.6, "elapsed_time": "0:40:02", "remaining_time": "0:15:53", "throughput": 5560.32, "total_tokens": 13358592} +{"current_steps": 27130, "total_steps": 37885, "loss": 0.0, "lr": 4.521914534197585e-07, "epoch": 3.580572786063086, "percentage": 71.61, "elapsed_time": "0:40:02", "remaining_time": "0:15:52", "throughput": 5560.65, "total_tokens": 13361216} +{"current_steps": 27135, "total_steps": 37885, "loss": 0.0, "lr": 4.518060897991721e-07, "epoch": 3.581232677840834, "percentage": 71.62, "elapsed_time": "0:40:03", "remaining_time": "0:15:52", "throughput": 5560.79, "total_tokens": 13363392} +{"current_steps": 27140, "total_steps": 37885, "loss": 0.0, "lr": 4.51420842528773e-07, "epoch": 3.5818925696185824, "percentage": 71.64, "elapsed_time": "0:40:03", "remaining_time": "0:15:51", "throughput": 5561.02, "total_tokens": 13365760} +{"current_steps": 27145, "total_steps": 37885, "loss": 0.0, "lr": 4.510357116903275e-07, "epoch": 3.582552461396331, "percentage": 71.65, "elapsed_time": "0:40:03", "remaining_time": "0:15:51", "throughput": 5561.2, "total_tokens": 13368000} +{"current_steps": 27150, "total_steps": 37885, "loss": 0.0, "lr": 4.5065069736557737e-07, "epoch": 3.5832123531740794, "percentage": 71.66, "elapsed_time": "0:40:04", "remaining_time": "0:15:50", "throughput": 5561.43, "total_tokens": 13370368} +{"current_steps": 27155, "total_steps": 37885, "loss": 0.0, "lr": 4.502657996362379e-07, "epoch": 3.583872244951828, "percentage": 71.68, "elapsed_time": "0:40:04", "remaining_time": "0:15:50", "throughput": 5561.85, "total_tokens": 13373248} +{"current_steps": 27160, "total_steps": 37885, "loss": 0.0005, "lr": 4.498810185840023e-07, "epoch": 3.5845321367295764, "percentage": 71.69, "elapsed_time": "0:40:04", "remaining_time": "0:15:49", "throughput": 5562.03, "total_tokens": 13375488} +{"current_steps": 27165, "total_steps": 37885, "loss": 0.0873, "lr": 4.494963542905369e-07, "epoch": 3.5851920285073247, "percentage": 71.7, "elapsed_time": "0:40:05", "remaining_time": "0:15:49", "throughput": 5562.26, "total_tokens": 13377856} +{"current_steps": 27170, "total_steps": 37885, "loss": 0.0007, "lr": 4.491118068374835e-07, "epoch": 3.5858519202850734, "percentage": 71.72, "elapsed_time": "0:40:05", "remaining_time": "0:15:48", "throughput": 5562.61, "total_tokens": 13380544} +{"current_steps": 27175, "total_steps": 37885, "loss": 0.0, "lr": 4.4872737630645984e-07, "epoch": 3.5865118120628217, "percentage": 71.73, "elapsed_time": "0:40:05", "remaining_time": "0:15:48", "throughput": 5562.84, "total_tokens": 13382912} +{"current_steps": 27180, "total_steps": 37885, "loss": 0.0001, "lr": 4.4834306277905855e-07, "epoch": 3.5871717038405704, "percentage": 71.74, "elapsed_time": "0:40:06", "remaining_time": "0:15:47", "throughput": 5563.02, "total_tokens": 13385152} +{"current_steps": 27185, "total_steps": 37885, "loss": 0.0, "lr": 4.4795886633684776e-07, "epoch": 3.5878315956183187, "percentage": 71.76, "elapsed_time": "0:40:06", "remaining_time": "0:15:47", "throughput": 5563.2, "total_tokens": 13387392} +{"current_steps": 27190, "total_steps": 37885, "loss": 0.0472, "lr": 4.4757478706136974e-07, "epoch": 3.588491487396067, "percentage": 71.77, "elapsed_time": "0:40:06", "remaining_time": "0:15:46", "throughput": 5563.4, "total_tokens": 13389696} +{"current_steps": 27195, "total_steps": 37885, "loss": 0.0004, "lr": 4.4719082503414273e-07, "epoch": 3.5891513791738157, "percentage": 71.78, "elapsed_time": "0:40:07", "remaining_time": "0:15:46", "throughput": 5563.56, "total_tokens": 13391872} +{"current_steps": 27200, "total_steps": 37885, "loss": 0.0, "lr": 4.468069803366604e-07, "epoch": 3.589811270951564, "percentage": 71.8, "elapsed_time": "0:40:07", "remaining_time": "0:15:45", "throughput": 5563.71, "total_tokens": 13394048} +{"current_steps": 27205, "total_steps": 37885, "loss": 0.0, "lr": 4.464232530503902e-07, "epoch": 3.5904711627293127, "percentage": 71.81, "elapsed_time": "0:40:07", "remaining_time": "0:15:45", "throughput": 5564.03, "total_tokens": 13396672} +{"current_steps": 27210, "total_steps": 37885, "loss": 0.1157, "lr": 4.460396432567759e-07, "epoch": 3.591131054507061, "percentage": 71.82, "elapsed_time": "0:40:08", "remaining_time": "0:15:44", "throughput": 5564.34, "total_tokens": 13399232} +{"current_steps": 27215, "total_steps": 37885, "loss": 0.0, "lr": 4.456561510372358e-07, "epoch": 3.591790946284809, "percentage": 71.84, "elapsed_time": "0:40:08", "remaining_time": "0:15:44", "throughput": 5564.57, "total_tokens": 13401600} +{"current_steps": 27220, "total_steps": 37885, "loss": 0.0, "lr": 4.4527277647316375e-07, "epoch": 3.5924508380625575, "percentage": 71.85, "elapsed_time": "0:40:08", "remaining_time": "0:15:43", "throughput": 5564.87, "total_tokens": 13404160} +{"current_steps": 27225, "total_steps": 37885, "loss": 0.0016, "lr": 4.448895196459275e-07, "epoch": 3.593110729840306, "percentage": 71.86, "elapsed_time": "0:40:09", "remaining_time": "0:15:43", "throughput": 5565.12, "total_tokens": 13406592} +{"current_steps": 27230, "total_steps": 37885, "loss": 0.0012, "lr": 4.4450638063687094e-07, "epoch": 3.5937706216180545, "percentage": 71.88, "elapsed_time": "0:40:09", "remaining_time": "0:15:42", "throughput": 5565.42, "total_tokens": 13409152} +{"current_steps": 27235, "total_steps": 37885, "loss": 0.0001, "lr": 4.4412335952731284e-07, "epoch": 3.594430513395803, "percentage": 71.89, "elapsed_time": "0:40:09", "remaining_time": "0:15:42", "throughput": 5565.75, "total_tokens": 13411776} +{"current_steps": 27240, "total_steps": 37885, "loss": 0.0009, "lr": 4.437404563985461e-07, "epoch": 3.5950904051735515, "percentage": 71.9, "elapsed_time": "0:40:10", "remaining_time": "0:15:41", "throughput": 5566.02, "total_tokens": 13414272} +{"current_steps": 27245, "total_steps": 37885, "loss": 0.0169, "lr": 4.4335767133183923e-07, "epoch": 3.5957502969512998, "percentage": 71.92, "elapsed_time": "0:40:10", "remaining_time": "0:15:41", "throughput": 5566.32, "total_tokens": 13416832} +{"current_steps": 27250, "total_steps": 37885, "loss": 0.075, "lr": 4.4297500440843616e-07, "epoch": 3.5964101887290485, "percentage": 71.93, "elapsed_time": "0:40:10", "remaining_time": "0:15:40", "throughput": 5566.53, "total_tokens": 13419136} +{"current_steps": 27255, "total_steps": 37885, "loss": 0.0004, "lr": 4.4259245570955437e-07, "epoch": 3.5970700805067968, "percentage": 71.94, "elapsed_time": "0:40:11", "remaining_time": "0:15:40", "throughput": 5566.8, "total_tokens": 13421632} +{"current_steps": 27260, "total_steps": 37885, "loss": 0.0001, "lr": 4.422100253163874e-07, "epoch": 3.5977299722845455, "percentage": 71.95, "elapsed_time": "0:40:11", "remaining_time": "0:15:39", "throughput": 5567.02, "total_tokens": 13424000} +{"current_steps": 27265, "total_steps": 37885, "loss": 0.0337, "lr": 4.4182771331010347e-07, "epoch": 3.5983898640622938, "percentage": 71.97, "elapsed_time": "0:40:11", "remaining_time": "0:15:39", "throughput": 5567.24, "total_tokens": 13426368} +{"current_steps": 27270, "total_steps": 37885, "loss": 0.002, "lr": 4.414455197718457e-07, "epoch": 3.599049755840042, "percentage": 71.98, "elapsed_time": "0:40:12", "remaining_time": "0:15:38", "throughput": 5567.42, "total_tokens": 13428608} +{"current_steps": 27275, "total_steps": 37885, "loss": 0.0002, "lr": 4.410634447827316e-07, "epoch": 3.5997096476177908, "percentage": 71.99, "elapsed_time": "0:40:12", "remaining_time": "0:15:38", "throughput": 5567.59, "total_tokens": 13430848} +{"current_steps": 27280, "total_steps": 37885, "loss": 0.1103, "lr": 4.406814884238532e-07, "epoch": 3.600369539395539, "percentage": 72.01, "elapsed_time": "0:40:12", "remaining_time": "0:15:37", "throughput": 5567.84, "total_tokens": 13433280} +{"current_steps": 27285, "total_steps": 37885, "loss": 0.0383, "lr": 4.4029965077627927e-07, "epoch": 3.6010294311732878, "percentage": 72.02, "elapsed_time": "0:40:12", "remaining_time": "0:15:37", "throughput": 5568.04, "total_tokens": 13435584} +{"current_steps": 27290, "total_steps": 37885, "loss": 0.0, "lr": 4.399179319210511e-07, "epoch": 3.601689322951036, "percentage": 72.03, "elapsed_time": "0:40:13", "remaining_time": "0:15:36", "throughput": 5568.32, "total_tokens": 13438080} +{"current_steps": 27295, "total_steps": 37885, "loss": 0.0, "lr": 4.3953633193918606e-07, "epoch": 3.6023492147287843, "percentage": 72.05, "elapsed_time": "0:40:13", "remaining_time": "0:15:36", "throughput": 5568.69, "total_tokens": 13440832} +{"current_steps": 27300, "total_steps": 37885, "loss": 0.1113, "lr": 4.3915485091167647e-07, "epoch": 3.603009106506533, "percentage": 72.06, "elapsed_time": "0:40:13", "remaining_time": "0:15:35", "throughput": 5569.03, "total_tokens": 13443520} +{"current_steps": 27305, "total_steps": 37885, "loss": 0.0, "lr": 4.3877348891948794e-07, "epoch": 3.6036689982842813, "percentage": 72.07, "elapsed_time": "0:40:14", "remaining_time": "0:15:35", "throughput": 5569.23, "total_tokens": 13445824} +{"current_steps": 27310, "total_steps": 37885, "loss": 0.0001, "lr": 4.3839224604356274e-07, "epoch": 3.60432889006203, "percentage": 72.09, "elapsed_time": "0:40:14", "remaining_time": "0:15:34", "throughput": 5569.57, "total_tokens": 13448512} +{"current_steps": 27315, "total_steps": 37885, "loss": 0.0, "lr": 4.3801112236481575e-07, "epoch": 3.6049887818397783, "percentage": 72.1, "elapsed_time": "0:40:14", "remaining_time": "0:15:34", "throughput": 5569.82, "total_tokens": 13450944} +{"current_steps": 27320, "total_steps": 37885, "loss": 0.0001, "lr": 4.3763011796413915e-07, "epoch": 3.6056486736175266, "percentage": 72.11, "elapsed_time": "0:40:15", "remaining_time": "0:15:34", "throughput": 5570.07, "total_tokens": 13453376} +{"current_steps": 27325, "total_steps": 37885, "loss": 0.0001, "lr": 4.372492329223977e-07, "epoch": 3.6063085653952753, "percentage": 72.13, "elapsed_time": "0:40:15", "remaining_time": "0:15:33", "throughput": 5570.37, "total_tokens": 13455936} +{"current_steps": 27330, "total_steps": 37885, "loss": 0.0281, "lr": 4.3686846732043105e-07, "epoch": 3.6069684571730236, "percentage": 72.14, "elapsed_time": "0:40:15", "remaining_time": "0:15:33", "throughput": 5570.69, "total_tokens": 13458560} +{"current_steps": 27335, "total_steps": 37885, "loss": 0.0626, "lr": 4.3648782123905424e-07, "epoch": 3.6076283489507723, "percentage": 72.15, "elapsed_time": "0:40:16", "remaining_time": "0:15:32", "throughput": 5570.89, "total_tokens": 13460864} +{"current_steps": 27340, "total_steps": 37885, "loss": 0.0001, "lr": 4.361072947590568e-07, "epoch": 3.6082882407285206, "percentage": 72.17, "elapsed_time": "0:40:16", "remaining_time": "0:15:32", "throughput": 5571.17, "total_tokens": 13463360} +{"current_steps": 27345, "total_steps": 37885, "loss": 0.0001, "lr": 4.3572688796120307e-07, "epoch": 3.608948132506269, "percentage": 72.18, "elapsed_time": "0:40:16", "remaining_time": "0:15:31", "throughput": 5571.53, "total_tokens": 13466112} +{"current_steps": 27350, "total_steps": 37885, "loss": 0.001, "lr": 4.353466009262309e-07, "epoch": 3.609608024284017, "percentage": 72.19, "elapsed_time": "0:40:17", "remaining_time": "0:15:31", "throughput": 5571.88, "total_tokens": 13468800} +{"current_steps": 27355, "total_steps": 37885, "loss": 0.0226, "lr": 4.3496643373485367e-07, "epoch": 3.610267916061766, "percentage": 72.21, "elapsed_time": "0:40:17", "remaining_time": "0:15:30", "throughput": 5572.15, "total_tokens": 13471296} +{"current_steps": 27360, "total_steps": 37885, "loss": 0.0, "lr": 4.345863864677596e-07, "epoch": 3.610927807839514, "percentage": 72.22, "elapsed_time": "0:40:17", "remaining_time": "0:15:30", "throughput": 5572.39, "total_tokens": 13473728} +{"current_steps": 27365, "total_steps": 37885, "loss": 0.0009, "lr": 4.342064592056103e-07, "epoch": 3.611587699617263, "percentage": 72.23, "elapsed_time": "0:40:18", "remaining_time": "0:15:29", "throughput": 5572.6, "total_tokens": 13476032} +{"current_steps": 27370, "total_steps": 37885, "loss": 0.0, "lr": 4.338266520290428e-07, "epoch": 3.612247591395011, "percentage": 72.24, "elapsed_time": "0:40:18", "remaining_time": "0:15:29", "throughput": 5572.9, "total_tokens": 13478592} +{"current_steps": 27375, "total_steps": 37885, "loss": 0.0688, "lr": 4.3344696501866893e-07, "epoch": 3.6129074831727594, "percentage": 72.26, "elapsed_time": "0:40:18", "remaining_time": "0:15:28", "throughput": 5573.18, "total_tokens": 13481088} +{"current_steps": 27380, "total_steps": 37885, "loss": 0.0001, "lr": 4.330673982550738e-07, "epoch": 3.613567374950508, "percentage": 72.27, "elapsed_time": "0:40:19", "remaining_time": "0:15:28", "throughput": 5573.35, "total_tokens": 13483328} +{"current_steps": 27385, "total_steps": 37885, "loss": 0.0, "lr": 4.326879518188178e-07, "epoch": 3.6142272667282564, "percentage": 72.28, "elapsed_time": "0:40:19", "remaining_time": "0:15:27", "throughput": 5573.66, "total_tokens": 13485888} +{"current_steps": 27390, "total_steps": 37885, "loss": 0.0, "lr": 4.323086257904359e-07, "epoch": 3.614887158506005, "percentage": 72.3, "elapsed_time": "0:40:19", "remaining_time": "0:15:27", "throughput": 5573.98, "total_tokens": 13488512} +{"current_steps": 27395, "total_steps": 37885, "loss": 0.0, "lr": 4.319294202504378e-07, "epoch": 3.6155470502837534, "percentage": 72.31, "elapsed_time": "0:40:20", "remaining_time": "0:15:26", "throughput": 5574.13, "total_tokens": 13490688} +{"current_steps": 27400, "total_steps": 37885, "loss": 0.0, "lr": 4.3155033527930606e-07, "epoch": 3.6162069420615017, "percentage": 72.32, "elapsed_time": "0:40:20", "remaining_time": "0:15:26", "throughput": 5574.32, "total_tokens": 13492992} +{"current_steps": 27405, "total_steps": 37885, "loss": 0.0201, "lr": 4.3117137095749945e-07, "epoch": 3.6168668338392505, "percentage": 72.34, "elapsed_time": "0:40:20", "remaining_time": "0:15:25", "throughput": 5574.55, "total_tokens": 13495360} +{"current_steps": 27410, "total_steps": 37885, "loss": 0.0018, "lr": 4.307925273654505e-07, "epoch": 3.6175267256169987, "percentage": 72.35, "elapsed_time": "0:40:21", "remaining_time": "0:15:25", "throughput": 5574.8, "total_tokens": 13497792} +{"current_steps": 27415, "total_steps": 37885, "loss": 0.0, "lr": 4.3041380458356534e-07, "epoch": 3.6181866173947475, "percentage": 72.36, "elapsed_time": "0:40:21", "remaining_time": "0:15:24", "throughput": 5575.04, "total_tokens": 13500224} +{"current_steps": 27420, "total_steps": 37885, "loss": 0.0, "lr": 4.3003520269222557e-07, "epoch": 3.6188465091724957, "percentage": 72.38, "elapsed_time": "0:40:21", "remaining_time": "0:15:24", "throughput": 5575.19, "total_tokens": 13502400} +{"current_steps": 27425, "total_steps": 37885, "loss": 0.0533, "lr": 4.29656721771787e-07, "epoch": 3.619506400950244, "percentage": 72.39, "elapsed_time": "0:40:22", "remaining_time": "0:15:23", "throughput": 5575.59, "total_tokens": 13505216} +{"current_steps": 27430, "total_steps": 37885, "loss": 0.0549, "lr": 4.292783619025788e-07, "epoch": 3.6201662927279927, "percentage": 72.4, "elapsed_time": "0:40:22", "remaining_time": "0:15:23", "throughput": 5575.79, "total_tokens": 13507520} +{"current_steps": 27435, "total_steps": 37885, "loss": 0.0176, "lr": 4.289001231649054e-07, "epoch": 3.620826184505741, "percentage": 72.42, "elapsed_time": "0:40:22", "remaining_time": "0:15:22", "throughput": 5576.11, "total_tokens": 13510144} +{"current_steps": 27440, "total_steps": 37885, "loss": 0.0595, "lr": 4.285220056390454e-07, "epoch": 3.6214860762834897, "percentage": 72.43, "elapsed_time": "0:40:23", "remaining_time": "0:15:22", "throughput": 5576.39, "total_tokens": 13512640} +{"current_steps": 27445, "total_steps": 37885, "loss": 0.0001, "lr": 4.2814400940525164e-07, "epoch": 3.622145968061238, "percentage": 72.44, "elapsed_time": "0:40:23", "remaining_time": "0:15:21", "throughput": 5576.68, "total_tokens": 13515200} +{"current_steps": 27450, "total_steps": 37885, "loss": 0.0, "lr": 4.2776613454375087e-07, "epoch": 3.6228058598389863, "percentage": 72.46, "elapsed_time": "0:40:23", "remaining_time": "0:15:21", "throughput": 5576.9, "total_tokens": 13517568} +{"current_steps": 27455, "total_steps": 37885, "loss": 0.0004, "lr": 4.2738838113474353e-07, "epoch": 3.623465751616735, "percentage": 72.47, "elapsed_time": "0:40:24", "remaining_time": "0:15:20", "throughput": 5577.17, "total_tokens": 13520064} +{"current_steps": 27460, "total_steps": 37885, "loss": 0.0002, "lr": 4.2701074925840643e-07, "epoch": 3.6241256433944833, "percentage": 72.48, "elapsed_time": "0:40:24", "remaining_time": "0:15:20", "throughput": 5577.5, "total_tokens": 13522688} +{"current_steps": 27465, "total_steps": 37885, "loss": 0.0, "lr": 4.266332389948882e-07, "epoch": 3.624785535172232, "percentage": 72.5, "elapsed_time": "0:40:24", "remaining_time": "0:15:19", "throughput": 5577.84, "total_tokens": 13525376} +{"current_steps": 27470, "total_steps": 37885, "loss": 0.0, "lr": 4.2625585042431347e-07, "epoch": 3.6254454269499803, "percentage": 72.51, "elapsed_time": "0:40:25", "remaining_time": "0:15:19", "throughput": 5578.04, "total_tokens": 13527680} +{"current_steps": 27475, "total_steps": 37885, "loss": 0.0, "lr": 4.258785836267792e-07, "epoch": 3.6261053187277286, "percentage": 72.52, "elapsed_time": "0:40:25", "remaining_time": "0:15:18", "throughput": 5578.28, "total_tokens": 13530112} +{"current_steps": 27480, "total_steps": 37885, "loss": 0.0002, "lr": 4.255014386823582e-07, "epoch": 3.626765210505477, "percentage": 72.54, "elapsed_time": "0:40:25", "remaining_time": "0:15:18", "throughput": 5578.48, "total_tokens": 13532416} +{"current_steps": 27485, "total_steps": 37885, "loss": 0.0, "lr": 4.25124415671097e-07, "epoch": 3.6274251022832256, "percentage": 72.55, "elapsed_time": "0:40:26", "remaining_time": "0:15:18", "throughput": 5578.8, "total_tokens": 13535040} +{"current_steps": 27490, "total_steps": 37885, "loss": 0.0008, "lr": 4.24747514673015e-07, "epoch": 3.628084994060974, "percentage": 72.56, "elapsed_time": "0:40:26", "remaining_time": "0:15:17", "throughput": 5579.02, "total_tokens": 13537408} +{"current_steps": 27495, "total_steps": 37885, "loss": 0.0457, "lr": 4.24370735768108e-07, "epoch": 3.6287448858387226, "percentage": 72.57, "elapsed_time": "0:40:26", "remaining_time": "0:15:17", "throughput": 5579.17, "total_tokens": 13539584} +{"current_steps": 27500, "total_steps": 37885, "loss": 0.0004, "lr": 4.23994079036344e-07, "epoch": 3.629404777616471, "percentage": 72.59, "elapsed_time": "0:40:27", "remaining_time": "0:15:16", "throughput": 5579.47, "total_tokens": 13542144} +{"current_steps": 27505, "total_steps": 37885, "loss": 0.0005, "lr": 4.2361754455766517e-07, "epoch": 3.630064669394219, "percentage": 72.6, "elapsed_time": "0:40:27", "remaining_time": "0:15:16", "throughput": 5579.72, "total_tokens": 13544576} +{"current_steps": 27510, "total_steps": 37885, "loss": 0.0, "lr": 4.232411324119888e-07, "epoch": 3.630724561171968, "percentage": 72.61, "elapsed_time": "0:40:27", "remaining_time": "0:15:15", "throughput": 5579.91, "total_tokens": 13546880} +{"current_steps": 27515, "total_steps": 37885, "loss": 0.0736, "lr": 4.228648426792054e-07, "epoch": 3.631384452949716, "percentage": 72.63, "elapsed_time": "0:40:28", "remaining_time": "0:15:15", "throughput": 5580.21, "total_tokens": 13549440} +{"current_steps": 27520, "total_steps": 37885, "loss": 0.0, "lr": 4.224886754391803e-07, "epoch": 3.632044344727465, "percentage": 72.64, "elapsed_time": "0:40:28", "remaining_time": "0:15:14", "throughput": 5580.5, "total_tokens": 13552000} +{"current_steps": 27525, "total_steps": 37885, "loss": 0.001, "lr": 4.2211263077175144e-07, "epoch": 3.632704236505213, "percentage": 72.65, "elapsed_time": "0:40:28", "remaining_time": "0:15:14", "throughput": 5580.84, "total_tokens": 13554688} +{"current_steps": 27530, "total_steps": 37885, "loss": 0.0611, "lr": 4.2173670875673197e-07, "epoch": 3.6333641282829614, "percentage": 72.67, "elapsed_time": "0:40:29", "remaining_time": "0:15:13", "throughput": 5581.25, "total_tokens": 13557568} +{"current_steps": 27535, "total_steps": 37885, "loss": 0.0, "lr": 4.213609094739089e-07, "epoch": 3.63402402006071, "percentage": 72.68, "elapsed_time": "0:40:29", "remaining_time": "0:15:13", "throughput": 5581.54, "total_tokens": 13560128} +{"current_steps": 27540, "total_steps": 37885, "loss": 0.0005, "lr": 4.2098523300304236e-07, "epoch": 3.6346839118384584, "percentage": 72.69, "elapsed_time": "0:40:29", "remaining_time": "0:15:12", "throughput": 5581.8, "total_tokens": 13562560} +{"current_steps": 27545, "total_steps": 37885, "loss": 0.0001, "lr": 4.2060967942386715e-07, "epoch": 3.635343803616207, "percentage": 72.71, "elapsed_time": "0:40:30", "remaining_time": "0:15:12", "throughput": 5582.02, "total_tokens": 13564928} +{"current_steps": 27550, "total_steps": 37885, "loss": 0.0001, "lr": 4.2023424881609195e-07, "epoch": 3.6360036953939554, "percentage": 72.72, "elapsed_time": "0:40:30", "remaining_time": "0:15:11", "throughput": 5582.27, "total_tokens": 13567360} +{"current_steps": 27555, "total_steps": 37885, "loss": 0.0002, "lr": 4.1985894125939947e-07, "epoch": 3.6366635871717037, "percentage": 72.73, "elapsed_time": "0:40:30", "remaining_time": "0:15:11", "throughput": 5582.57, "total_tokens": 13569920} +{"current_steps": 27560, "total_steps": 37885, "loss": 0.0065, "lr": 4.194837568334452e-07, "epoch": 3.6373234789494524, "percentage": 72.75, "elapsed_time": "0:40:31", "remaining_time": "0:15:10", "throughput": 5582.79, "total_tokens": 13572288} +{"current_steps": 27565, "total_steps": 37885, "loss": 0.0, "lr": 4.191086956178598e-07, "epoch": 3.6379833707272007, "percentage": 72.76, "elapsed_time": "0:40:31", "remaining_time": "0:15:10", "throughput": 5583.04, "total_tokens": 13574720} +{"current_steps": 27570, "total_steps": 37885, "loss": 0.0, "lr": 4.187337576922476e-07, "epoch": 3.6386432625049494, "percentage": 72.77, "elapsed_time": "0:40:31", "remaining_time": "0:15:09", "throughput": 5583.28, "total_tokens": 13577152} +{"current_steps": 27575, "total_steps": 37885, "loss": 0.0028, "lr": 4.1835894313618593e-07, "epoch": 3.6393031542826977, "percentage": 72.79, "elapsed_time": "0:40:32", "remaining_time": "0:15:09", "throughput": 5583.53, "total_tokens": 13579584} +{"current_steps": 27580, "total_steps": 37885, "loss": 0.0, "lr": 4.179842520292265e-07, "epoch": 3.639963046060446, "percentage": 72.8, "elapsed_time": "0:40:32", "remaining_time": "0:15:08", "throughput": 5583.78, "total_tokens": 13582016} +{"current_steps": 27585, "total_steps": 37885, "loss": 0.0, "lr": 4.176096844508954e-07, "epoch": 3.6406229378381947, "percentage": 72.81, "elapsed_time": "0:40:32", "remaining_time": "0:15:08", "throughput": 5583.93, "total_tokens": 13584192} +{"current_steps": 27590, "total_steps": 37885, "loss": 0.0002, "lr": 4.17235240480691e-07, "epoch": 3.641282829615943, "percentage": 72.83, "elapsed_time": "0:40:33", "remaining_time": "0:15:07", "throughput": 5584.18, "total_tokens": 13586624} +{"current_steps": 27595, "total_steps": 37885, "loss": 0.0016, "lr": 4.1686092019808685e-07, "epoch": 3.6419427213936917, "percentage": 72.84, "elapsed_time": "0:40:33", "remaining_time": "0:15:07", "throughput": 5584.35, "total_tokens": 13588864} +{"current_steps": 27600, "total_steps": 37885, "loss": 0.0487, "lr": 4.164867236825296e-07, "epoch": 3.64260261317144, "percentage": 72.85, "elapsed_time": "0:40:33", "remaining_time": "0:15:06", "throughput": 5584.69, "total_tokens": 13591552} +{"current_steps": 27605, "total_steps": 37885, "loss": 0.028, "lr": 4.1611265101344005e-07, "epoch": 3.6432625049491882, "percentage": 72.87, "elapsed_time": "0:40:34", "remaining_time": "0:15:06", "throughput": 5584.91, "total_tokens": 13593920} +{"current_steps": 27610, "total_steps": 37885, "loss": 0.066, "lr": 4.1573870227021224e-07, "epoch": 3.6439223967269365, "percentage": 72.88, "elapsed_time": "0:40:34", "remaining_time": "0:15:05", "throughput": 5585.13, "total_tokens": 13596288} +{"current_steps": 27615, "total_steps": 37885, "loss": 0.0, "lr": 4.153648775322132e-07, "epoch": 3.6445822885046852, "percentage": 72.89, "elapsed_time": "0:40:34", "remaining_time": "0:15:05", "throughput": 5585.28, "total_tokens": 13598464} +{"current_steps": 27620, "total_steps": 37885, "loss": 0.0014, "lr": 4.1499117687878606e-07, "epoch": 3.6452421802824335, "percentage": 72.9, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.46, "total_tokens": 13600704} +{"current_steps": 27625, "total_steps": 37885, "loss": 0.0487, "lr": 4.1461760038924496e-07, "epoch": 3.6459020720601822, "percentage": 72.92, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.71, "total_tokens": 13603136} +{"current_steps": 27630, "total_steps": 37885, "loss": 0.0, "lr": 4.142441481428792e-07, "epoch": 3.6465619638379305, "percentage": 72.93, "elapsed_time": "0:40:35", "remaining_time": "0:15:04", "throughput": 5585.89, "total_tokens": 13605440} +{"current_steps": 27635, "total_steps": 37885, "loss": 0.0, "lr": 4.138708202189516e-07, "epoch": 3.647221855615679, "percentage": 72.94, "elapsed_time": "0:40:36", "remaining_time": "0:15:03", "throughput": 5586.09, "total_tokens": 13607744} +{"current_steps": 27640, "total_steps": 37885, "loss": 0.0, "lr": 4.134976166966977e-07, "epoch": 3.6478817473934275, "percentage": 72.96, "elapsed_time": "0:40:36", "remaining_time": "0:15:03", "throughput": 5586.36, "total_tokens": 13610240} +{"current_steps": 27645, "total_steps": 37885, "loss": 0.0754, "lr": 4.131245376553278e-07, "epoch": 3.648541639171176, "percentage": 72.97, "elapsed_time": "0:40:36", "remaining_time": "0:15:02", "throughput": 5586.53, "total_tokens": 13612480} +{"current_steps": 27650, "total_steps": 37885, "loss": 0.0028, "lr": 4.1275158317402436e-07, "epoch": 3.6492015309489245, "percentage": 72.98, "elapsed_time": "0:40:36", "remaining_time": "0:15:02", "throughput": 5586.85, "total_tokens": 13615104} +{"current_steps": 27655, "total_steps": 37885, "loss": 0.0, "lr": 4.123787533319455e-07, "epoch": 3.649861422726673, "percentage": 73.0, "elapsed_time": "0:40:37", "remaining_time": "0:15:01", "throughput": 5587.09, "total_tokens": 13617536} +{"current_steps": 27660, "total_steps": 37885, "loss": 0.0018, "lr": 4.1200604820822103e-07, "epoch": 3.650521314504421, "percentage": 73.01, "elapsed_time": "0:40:37", "remaining_time": "0:15:01", "throughput": 5587.31, "total_tokens": 13619904} +{"current_steps": 27665, "total_steps": 37885, "loss": 0.0, "lr": 4.1163346788195465e-07, "epoch": 3.65118120628217, "percentage": 73.02, "elapsed_time": "0:40:37", "remaining_time": "0:15:00", "throughput": 5587.6, "total_tokens": 13622464} +{"current_steps": 27670, "total_steps": 37885, "loss": 0.0018, "lr": 4.11261012432224e-07, "epoch": 3.651841098059918, "percentage": 73.04, "elapsed_time": "0:40:38", "remaining_time": "0:15:00", "throughput": 5587.94, "total_tokens": 13625152} +{"current_steps": 27675, "total_steps": 37885, "loss": 0.0004, "lr": 4.1088868193808023e-07, "epoch": 3.652500989837667, "percentage": 73.05, "elapsed_time": "0:40:38", "remaining_time": "0:14:59", "throughput": 5588.22, "total_tokens": 13627712} +{"current_steps": 27680, "total_steps": 37885, "loss": 0.0, "lr": 4.10516476478548e-07, "epoch": 3.653160881615415, "percentage": 73.06, "elapsed_time": "0:40:38", "remaining_time": "0:14:59", "throughput": 5588.39, "total_tokens": 13629952} +{"current_steps": 27685, "total_steps": 37885, "loss": 0.0688, "lr": 4.101443961326245e-07, "epoch": 3.6538207733931634, "percentage": 73.08, "elapsed_time": "0:40:39", "remaining_time": "0:14:58", "throughput": 5588.72, "total_tokens": 13632576} +{"current_steps": 27690, "total_steps": 37885, "loss": 0.0, "lr": 4.0977244097928164e-07, "epoch": 3.654480665170912, "percentage": 73.09, "elapsed_time": "0:40:39", "remaining_time": "0:14:58", "throughput": 5588.93, "total_tokens": 13634944} +{"current_steps": 27695, "total_steps": 37885, "loss": 0.0, "lr": 4.094006110974645e-07, "epoch": 3.6551405569486604, "percentage": 73.1, "elapsed_time": "0:40:39", "remaining_time": "0:14:57", "throughput": 5589.13, "total_tokens": 13637248} +{"current_steps": 27700, "total_steps": 37885, "loss": 0.0001, "lr": 4.0902890656609044e-07, "epoch": 3.655800448726409, "percentage": 73.12, "elapsed_time": "0:40:40", "remaining_time": "0:14:57", "throughput": 5589.39, "total_tokens": 13639744} +{"current_steps": 27705, "total_steps": 37885, "loss": 0.1378, "lr": 4.0865732746405145e-07, "epoch": 3.6564603405041574, "percentage": 73.13, "elapsed_time": "0:40:40", "remaining_time": "0:14:56", "throughput": 5589.66, "total_tokens": 13642240} +{"current_steps": 27710, "total_steps": 37885, "loss": 0.0, "lr": 4.08285873870213e-07, "epoch": 3.6571202322819056, "percentage": 73.14, "elapsed_time": "0:40:40", "remaining_time": "0:14:56", "throughput": 5589.91, "total_tokens": 13644672} +{"current_steps": 27715, "total_steps": 37885, "loss": 0.0004, "lr": 4.079145458634125e-07, "epoch": 3.6577801240596544, "percentage": 73.16, "elapsed_time": "0:40:41", "remaining_time": "0:14:55", "throughput": 5590.13, "total_tokens": 13647040} +{"current_steps": 27720, "total_steps": 37885, "loss": 0.0003, "lr": 4.075433435224621e-07, "epoch": 3.6584400158374026, "percentage": 73.17, "elapsed_time": "0:40:41", "remaining_time": "0:14:55", "throughput": 5590.43, "total_tokens": 13649600} +{"current_steps": 27725, "total_steps": 37885, "loss": 0.0072, "lr": 4.071722669261468e-07, "epoch": 3.6590999076151514, "percentage": 73.18, "elapsed_time": "0:40:41", "remaining_time": "0:14:54", "throughput": 5590.79, "total_tokens": 13652352} +{"current_steps": 27730, "total_steps": 37885, "loss": 0.0, "lr": 4.068013161532253e-07, "epoch": 3.6597597993928996, "percentage": 73.2, "elapsed_time": "0:40:42", "remaining_time": "0:14:54", "throughput": 5591.11, "total_tokens": 13654976} +{"current_steps": 27735, "total_steps": 37885, "loss": 0.0007, "lr": 4.064304912824286e-07, "epoch": 3.660419691170648, "percentage": 73.21, "elapsed_time": "0:40:42", "remaining_time": "0:14:53", "throughput": 5591.35, "total_tokens": 13657408} +{"current_steps": 27740, "total_steps": 37885, "loss": 0.0004, "lr": 4.0605979239246166e-07, "epoch": 3.661079582948396, "percentage": 73.22, "elapsed_time": "0:40:42", "remaining_time": "0:14:53", "throughput": 5591.42, "total_tokens": 13659392} +{"current_steps": 27745, "total_steps": 37885, "loss": 0.0, "lr": 4.056892195620032e-07, "epoch": 3.661739474726145, "percentage": 73.23, "elapsed_time": "0:40:43", "remaining_time": "0:14:52", "throughput": 5591.67, "total_tokens": 13661824} +{"current_steps": 27750, "total_steps": 37885, "loss": 0.0, "lr": 4.0531877286970397e-07, "epoch": 3.6623993665038936, "percentage": 73.25, "elapsed_time": "0:40:43", "remaining_time": "0:14:52", "throughput": 5591.96, "total_tokens": 13664384} +{"current_steps": 27755, "total_steps": 37885, "loss": 0.0001, "lr": 4.0494845239418873e-07, "epoch": 3.663059258281642, "percentage": 73.26, "elapsed_time": "0:40:43", "remaining_time": "0:14:51", "throughput": 5592.16, "total_tokens": 13666688} +{"current_steps": 27760, "total_steps": 37885, "loss": 0.0844, "lr": 4.045782582140559e-07, "epoch": 3.66371915005939, "percentage": 73.27, "elapsed_time": "0:40:44", "remaining_time": "0:14:51", "throughput": 5592.38, "total_tokens": 13669056} +{"current_steps": 27765, "total_steps": 37885, "loss": 0.1125, "lr": 4.042081904078757e-07, "epoch": 3.6643790418371385, "percentage": 73.29, "elapsed_time": "0:40:44", "remaining_time": "0:14:51", "throughput": 5592.7, "total_tokens": 13671680} +{"current_steps": 27770, "total_steps": 37885, "loss": 0.0, "lr": 4.0383824905419263e-07, "epoch": 3.665038933614887, "percentage": 73.3, "elapsed_time": "0:40:44", "remaining_time": "0:14:50", "throughput": 5592.99, "total_tokens": 13674240} +{"current_steps": 27775, "total_steps": 37885, "loss": 0.0549, "lr": 4.034684342315241e-07, "epoch": 3.6656988253926355, "percentage": 73.31, "elapsed_time": "0:40:45", "remaining_time": "0:14:50", "throughput": 5593.23, "total_tokens": 13676672} +{"current_steps": 27780, "total_steps": 37885, "loss": 0.0, "lr": 4.0309874601836114e-07, "epoch": 3.666358717170384, "percentage": 73.33, "elapsed_time": "0:40:45", "remaining_time": "0:14:49", "throughput": 5593.43, "total_tokens": 13678976} +{"current_steps": 27785, "total_steps": 37885, "loss": 0.0626, "lr": 4.0272918449316684e-07, "epoch": 3.6670186089481325, "percentage": 73.34, "elapsed_time": "0:40:45", "remaining_time": "0:14:49", "throughput": 5593.64, "total_tokens": 13681344} +{"current_steps": 27790, "total_steps": 37885, "loss": 0.0, "lr": 4.0235974973437735e-07, "epoch": 3.6676785007258808, "percentage": 73.35, "elapsed_time": "0:40:46", "remaining_time": "0:14:48", "throughput": 5593.77, "total_tokens": 13683456} +{"current_steps": 27795, "total_steps": 37885, "loss": 0.0, "lr": 4.0199044182040385e-07, "epoch": 3.6683383925036295, "percentage": 73.37, "elapsed_time": "0:40:46", "remaining_time": "0:14:48", "throughput": 5594.15, "total_tokens": 13686272} +{"current_steps": 27800, "total_steps": 37885, "loss": 0.1113, "lr": 4.016212608296284e-07, "epoch": 3.6689982842813778, "percentage": 73.38, "elapsed_time": "0:40:46", "remaining_time": "0:14:47", "throughput": 5594.47, "total_tokens": 13688896} +{"current_steps": 27805, "total_steps": 37885, "loss": 0.0472, "lr": 4.012522068404075e-07, "epoch": 3.6696581760591265, "percentage": 73.39, "elapsed_time": "0:40:47", "remaining_time": "0:14:47", "throughput": 5594.67, "total_tokens": 13691200} +{"current_steps": 27810, "total_steps": 37885, "loss": 0.0, "lr": 4.0088327993106964e-07, "epoch": 3.6703180678368748, "percentage": 73.41, "elapsed_time": "0:40:47", "remaining_time": "0:14:46", "throughput": 5595.0, "total_tokens": 13693888} +{"current_steps": 27815, "total_steps": 37885, "loss": 0.0308, "lr": 4.005144801799171e-07, "epoch": 3.670977959614623, "percentage": 73.42, "elapsed_time": "0:40:47", "remaining_time": "0:14:46", "throughput": 5595.22, "total_tokens": 13696256} +{"current_steps": 27820, "total_steps": 37885, "loss": 0.1735, "lr": 4.001458076652253e-07, "epoch": 3.6716378513923718, "percentage": 73.43, "elapsed_time": "0:40:48", "remaining_time": "0:14:45", "throughput": 5595.49, "total_tokens": 13698752} +{"current_steps": 27825, "total_steps": 37885, "loss": 0.0006, "lr": 3.9977726246524133e-07, "epoch": 3.67229774317012, "percentage": 73.45, "elapsed_time": "0:40:48", "remaining_time": "0:14:45", "throughput": 5595.8, "total_tokens": 13701376} +{"current_steps": 27830, "total_steps": 37885, "loss": 0.0, "lr": 3.994088446581877e-07, "epoch": 3.6729576349478688, "percentage": 73.46, "elapsed_time": "0:40:48", "remaining_time": "0:14:44", "throughput": 5596.04, "total_tokens": 13703808} +{"current_steps": 27835, "total_steps": 37885, "loss": 0.0, "lr": 3.990405543222576e-07, "epoch": 3.673617526725617, "percentage": 73.47, "elapsed_time": "0:40:49", "remaining_time": "0:14:44", "throughput": 5596.25, "total_tokens": 13706176} +{"current_steps": 27840, "total_steps": 37885, "loss": 0.0079, "lr": 3.9867239153561774e-07, "epoch": 3.6742774185033653, "percentage": 73.49, "elapsed_time": "0:40:49", "remaining_time": "0:14:43", "throughput": 5596.45, "total_tokens": 13708480} +{"current_steps": 27845, "total_steps": 37885, "loss": 0.0003, "lr": 3.9830435637640825e-07, "epoch": 3.674937310281114, "percentage": 73.5, "elapsed_time": "0:40:49", "remaining_time": "0:14:43", "throughput": 5596.66, "total_tokens": 13710848} +{"current_steps": 27850, "total_steps": 37885, "loss": 0.0, "lr": 3.979364489227419e-07, "epoch": 3.6755972020588623, "percentage": 73.51, "elapsed_time": "0:40:50", "remaining_time": "0:14:42", "throughput": 5596.8, "total_tokens": 13713024} +{"current_steps": 27855, "total_steps": 37885, "loss": 0.0005, "lr": 3.9756866925270494e-07, "epoch": 3.676257093836611, "percentage": 73.53, "elapsed_time": "0:40:50", "remaining_time": "0:14:42", "throughput": 5597.17, "total_tokens": 13715776} +{"current_steps": 27860, "total_steps": 37885, "loss": 0.0004, "lr": 3.972010174443551e-07, "epoch": 3.6769169856143593, "percentage": 73.54, "elapsed_time": "0:40:50", "remaining_time": "0:14:41", "throughput": 5597.46, "total_tokens": 13718336} +{"current_steps": 27865, "total_steps": 37885, "loss": 0.0, "lr": 3.9683349357572417e-07, "epoch": 3.6775768773921076, "percentage": 73.55, "elapsed_time": "0:40:51", "remaining_time": "0:14:41", "throughput": 5597.75, "total_tokens": 13720896} +{"current_steps": 27870, "total_steps": 37885, "loss": 0.0, "lr": 3.9646609772481677e-07, "epoch": 3.678236769169856, "percentage": 73.56, "elapsed_time": "0:40:51", "remaining_time": "0:14:40", "throughput": 5597.91, "total_tokens": 13723136} +{"current_steps": 27875, "total_steps": 37885, "loss": 0.0005, "lr": 3.960988299696094e-07, "epoch": 3.6788966609476046, "percentage": 73.58, "elapsed_time": "0:40:51", "remaining_time": "0:14:40", "throughput": 5598.16, "total_tokens": 13725568} +{"current_steps": 27880, "total_steps": 37885, "loss": 0.0, "lr": 3.957316903880522e-07, "epoch": 3.6795565527253533, "percentage": 73.59, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.37, "total_tokens": 13727936} +{"current_steps": 27885, "total_steps": 37885, "loss": 0.0, "lr": 3.953646790580679e-07, "epoch": 3.6802164445031016, "percentage": 73.6, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.56, "total_tokens": 13730240} +{"current_steps": 27890, "total_steps": 37885, "loss": 0.0001, "lr": 3.949977960575525e-07, "epoch": 3.68087633628085, "percentage": 73.62, "elapsed_time": "0:40:52", "remaining_time": "0:14:39", "throughput": 5598.89, "total_tokens": 13732928} +{"current_steps": 27895, "total_steps": 37885, "loss": 0.0109, "lr": 3.946310414643734e-07, "epoch": 3.681536228058598, "percentage": 73.63, "elapsed_time": "0:40:53", "remaining_time": "0:14:38", "throughput": 5599.23, "total_tokens": 13735616} +{"current_steps": 27900, "total_steps": 37885, "loss": 0.0487, "lr": 3.94264415356372e-07, "epoch": 3.682196119836347, "percentage": 73.64, "elapsed_time": "0:40:53", "remaining_time": "0:14:38", "throughput": 5599.48, "total_tokens": 13738048} +{"current_steps": 27905, "total_steps": 37885, "loss": 0.121, "lr": 3.938979178113625e-07, "epoch": 3.682856011614095, "percentage": 73.66, "elapsed_time": "0:40:53", "remaining_time": "0:14:37", "throughput": 5599.74, "total_tokens": 13740544} +{"current_steps": 27910, "total_steps": 37885, "loss": 0.0005, "lr": 3.9353154890713037e-07, "epoch": 3.683515903391844, "percentage": 73.67, "elapsed_time": "0:40:54", "remaining_time": "0:14:37", "throughput": 5600.05, "total_tokens": 13743168} +{"current_steps": 27915, "total_steps": 37885, "loss": 0.0281, "lr": 3.9316530872143537e-07, "epoch": 3.684175795169592, "percentage": 73.68, "elapsed_time": "0:40:54", "remaining_time": "0:14:36", "throughput": 5600.22, "total_tokens": 13745408} +{"current_steps": 27920, "total_steps": 37885, "loss": 0.0337, "lr": 3.927991973320096e-07, "epoch": 3.6848356869473404, "percentage": 73.7, "elapsed_time": "0:40:54", "remaining_time": "0:14:36", "throughput": 5600.49, "total_tokens": 13747904} +{"current_steps": 27925, "total_steps": 37885, "loss": 0.0579, "lr": 3.924332148165569e-07, "epoch": 3.685495578725089, "percentage": 73.71, "elapsed_time": "0:40:55", "remaining_time": "0:14:35", "throughput": 5600.75, "total_tokens": 13750400} +{"current_steps": 27930, "total_steps": 37885, "loss": 0.0, "lr": 3.9206736125275463e-07, "epoch": 3.6861554705028374, "percentage": 73.72, "elapsed_time": "0:40:55", "remaining_time": "0:14:35", "throughput": 5601.06, "total_tokens": 13753024} +{"current_steps": 27935, "total_steps": 37885, "loss": 0.0, "lr": 3.9170163671825265e-07, "epoch": 3.686815362280586, "percentage": 73.74, "elapsed_time": "0:40:55", "remaining_time": "0:14:34", "throughput": 5601.32, "total_tokens": 13755520} +{"current_steps": 27940, "total_steps": 37885, "loss": 0.001, "lr": 3.9133604129067364e-07, "epoch": 3.6874752540583344, "percentage": 73.75, "elapsed_time": "0:40:56", "remaining_time": "0:14:34", "throughput": 5601.7, "total_tokens": 13758336} +{"current_steps": 27945, "total_steps": 37885, "loss": 0.0, "lr": 3.9097057504761234e-07, "epoch": 3.6881351458360827, "percentage": 73.76, "elapsed_time": "0:40:56", "remaining_time": "0:14:33", "throughput": 5602.02, "total_tokens": 13760960} +{"current_steps": 27950, "total_steps": 37885, "loss": 0.0, "lr": 3.9060523806663556e-07, "epoch": 3.6887950376138314, "percentage": 73.78, "elapsed_time": "0:40:56", "remaining_time": "0:14:33", "throughput": 5602.3, "total_tokens": 13763520} +{"current_steps": 27955, "total_steps": 37885, "loss": 0.0, "lr": 3.9024003042528474e-07, "epoch": 3.6894549293915797, "percentage": 73.79, "elapsed_time": "0:40:57", "remaining_time": "0:14:32", "throughput": 5602.62, "total_tokens": 13766144} +{"current_steps": 27960, "total_steps": 37885, "loss": 0.0, "lr": 3.898749522010716e-07, "epoch": 3.6901148211693284, "percentage": 73.8, "elapsed_time": "0:40:57", "remaining_time": "0:14:32", "throughput": 5602.93, "total_tokens": 13768768} +{"current_steps": 27965, "total_steps": 37885, "loss": 0.0674, "lr": 3.895100034714817e-07, "epoch": 3.6907747129470767, "percentage": 73.82, "elapsed_time": "0:40:57", "remaining_time": "0:14:31", "throughput": 5603.19, "total_tokens": 13771264} +{"current_steps": 27970, "total_steps": 37885, "loss": 0.0, "lr": 3.8914518431397305e-07, "epoch": 3.691434604724825, "percentage": 73.83, "elapsed_time": "0:40:58", "remaining_time": "0:14:31", "throughput": 5603.33, "total_tokens": 13773440} +{"current_steps": 27975, "total_steps": 37885, "loss": 0.0007, "lr": 3.887804948059752e-07, "epoch": 3.6920944965025737, "percentage": 73.84, "elapsed_time": "0:40:58", "remaining_time": "0:14:30", "throughput": 5603.58, "total_tokens": 13775872} +{"current_steps": 27980, "total_steps": 37885, "loss": 0.0, "lr": 3.8841593502489155e-07, "epoch": 3.692754388280322, "percentage": 73.86, "elapsed_time": "0:40:58", "remaining_time": "0:14:30", "throughput": 5603.75, "total_tokens": 13778112} +{"current_steps": 27985, "total_steps": 37885, "loss": 0.0, "lr": 3.880515050480964e-07, "epoch": 3.6934142800580707, "percentage": 73.87, "elapsed_time": "0:40:59", "remaining_time": "0:14:29", "throughput": 5603.99, "total_tokens": 13780544} +{"current_steps": 27990, "total_steps": 37885, "loss": 0.0012, "lr": 3.876872049529385e-07, "epoch": 3.694074171835819, "percentage": 73.88, "elapsed_time": "0:40:59", "remaining_time": "0:14:29", "throughput": 5604.23, "total_tokens": 13782976} +{"current_steps": 27995, "total_steps": 37885, "loss": 0.0, "lr": 3.8732303481673733e-07, "epoch": 3.6947340636135673, "percentage": 73.89, "elapsed_time": "0:40:59", "remaining_time": "0:14:28", "throughput": 5604.49, "total_tokens": 13785472} +{"current_steps": 28000, "total_steps": 37885, "loss": 0.0673, "lr": 3.869589947167851e-07, "epoch": 3.695393955391316, "percentage": 73.91, "elapsed_time": "0:41:00", "remaining_time": "0:14:28", "throughput": 5604.78, "total_tokens": 13788032} +{"current_steps": 28005, "total_steps": 37885, "loss": 0.0, "lr": 3.8659508473034684e-07, "epoch": 3.6960538471690643, "percentage": 73.92, "elapsed_time": "0:41:00", "remaining_time": "0:14:28", "throughput": 5605.05, "total_tokens": 13790528} +{"current_steps": 28010, "total_steps": 37885, "loss": 0.0029, "lr": 3.8623130493465994e-07, "epoch": 3.696713738946813, "percentage": 73.93, "elapsed_time": "0:41:00", "remaining_time": "0:14:27", "throughput": 5605.38, "total_tokens": 13793216} +{"current_steps": 28015, "total_steps": 37885, "loss": 0.0001, "lr": 3.8586765540693434e-07, "epoch": 3.6973736307245613, "percentage": 73.95, "elapsed_time": "0:41:01", "remaining_time": "0:14:27", "throughput": 5605.6, "total_tokens": 13795584} +{"current_steps": 28020, "total_steps": 37885, "loss": 0.0, "lr": 3.855041362243514e-07, "epoch": 3.6980335225023095, "percentage": 73.96, "elapsed_time": "0:41:01", "remaining_time": "0:14:26", "throughput": 5605.82, "total_tokens": 13797952} +{"current_steps": 28025, "total_steps": 37885, "loss": 0.0, "lr": 3.8514074746406566e-07, "epoch": 3.698693414280058, "percentage": 73.97, "elapsed_time": "0:41:01", "remaining_time": "0:14:26", "throughput": 5606.13, "total_tokens": 13800576} +{"current_steps": 28030, "total_steps": 37885, "loss": 0.0, "lr": 3.847774892032042e-07, "epoch": 3.6993533060578065, "percentage": 73.99, "elapsed_time": "0:41:02", "remaining_time": "0:14:25", "throughput": 5606.42, "total_tokens": 13803136} +{"current_steps": 28035, "total_steps": 37885, "loss": 0.0004, "lr": 3.844143615188652e-07, "epoch": 3.700013197835555, "percentage": 74.0, "elapsed_time": "0:41:02", "remaining_time": "0:14:25", "throughput": 5606.54, "total_tokens": 13805248} +{"current_steps": 28040, "total_steps": 37885, "loss": 0.0, "lr": 3.8405136448812023e-07, "epoch": 3.7006730896133035, "percentage": 74.01, "elapsed_time": "0:41:02", "remaining_time": "0:14:24", "throughput": 5606.69, "total_tokens": 13807424} +{"current_steps": 28045, "total_steps": 37885, "loss": 0.058, "lr": 3.8368849818801317e-07, "epoch": 3.701332981391052, "percentage": 74.03, "elapsed_time": "0:41:03", "remaining_time": "0:14:24", "throughput": 5607.09, "total_tokens": 13810304} +{"current_steps": 28050, "total_steps": 37885, "loss": 0.0, "lr": 3.8332576269555906e-07, "epoch": 3.7019928731688, "percentage": 74.04, "elapsed_time": "0:41:03", "remaining_time": "0:14:23", "throughput": 5607.25, "total_tokens": 13812544} +{"current_steps": 28055, "total_steps": 37885, "loss": 0.0, "lr": 3.8296315808774616e-07, "epoch": 3.702652764946549, "percentage": 74.05, "elapsed_time": "0:41:03", "remaining_time": "0:14:23", "throughput": 5607.52, "total_tokens": 13815040} +{"current_steps": 28060, "total_steps": 37885, "loss": 0.0, "lr": 3.826006844415347e-07, "epoch": 3.703312656724297, "percentage": 74.07, "elapsed_time": "0:41:03", "remaining_time": "0:14:22", "throughput": 5607.78, "total_tokens": 13817536} +{"current_steps": 28065, "total_steps": 37885, "loss": 0.0, "lr": 3.822383418338576e-07, "epoch": 3.703972548502046, "percentage": 74.08, "elapsed_time": "0:41:04", "remaining_time": "0:14:22", "throughput": 5607.96, "total_tokens": 13819840} +{"current_steps": 28070, "total_steps": 37885, "loss": 0.0, "lr": 3.8187613034161847e-07, "epoch": 3.704632440279794, "percentage": 74.09, "elapsed_time": "0:41:04", "remaining_time": "0:14:21", "throughput": 5608.18, "total_tokens": 13822208} +{"current_steps": 28075, "total_steps": 37885, "loss": 0.0001, "lr": 3.815140500416947e-07, "epoch": 3.7052923320575424, "percentage": 74.11, "elapsed_time": "0:41:04", "remaining_time": "0:14:21", "throughput": 5608.37, "total_tokens": 13824512} +{"current_steps": 28080, "total_steps": 37885, "loss": 0.0008, "lr": 3.811521010109353e-07, "epoch": 3.705952223835291, "percentage": 74.12, "elapsed_time": "0:41:05", "remaining_time": "0:14:20", "throughput": 5608.56, "total_tokens": 13826816} +{"current_steps": 28085, "total_steps": 37885, "loss": 0.0, "lr": 3.807902833261609e-07, "epoch": 3.7066121156130394, "percentage": 74.13, "elapsed_time": "0:41:05", "remaining_time": "0:14:20", "throughput": 5608.82, "total_tokens": 13829312} +{"current_steps": 28090, "total_steps": 37885, "loss": 0.0518, "lr": 3.804285970641649e-07, "epoch": 3.707272007390788, "percentage": 74.15, "elapsed_time": "0:41:05", "remaining_time": "0:14:19", "throughput": 5609.04, "total_tokens": 13831680} +{"current_steps": 28095, "total_steps": 37885, "loss": 0.0533, "lr": 3.800670423017128e-07, "epoch": 3.7079318991685364, "percentage": 74.16, "elapsed_time": "0:41:06", "remaining_time": "0:14:19", "throughput": 5609.33, "total_tokens": 13834240} +{"current_steps": 28100, "total_steps": 37885, "loss": 0.0718, "lr": 3.7970561911554143e-07, "epoch": 3.7085917909462847, "percentage": 74.17, "elapsed_time": "0:41:06", "remaining_time": "0:14:18", "throughput": 5609.44, "total_tokens": 13836352} +{"current_steps": 28105, "total_steps": 37885, "loss": 0.0, "lr": 3.793443275823607e-07, "epoch": 3.7092516827240334, "percentage": 74.19, "elapsed_time": "0:41:06", "remaining_time": "0:14:18", "throughput": 5609.75, "total_tokens": 13838976} +{"current_steps": 28110, "total_steps": 37885, "loss": 0.0011, "lr": 3.7898316777885195e-07, "epoch": 3.7099115745017817, "percentage": 74.2, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.01, "total_tokens": 13841472} +{"current_steps": 28115, "total_steps": 37885, "loss": 0.0003, "lr": 3.786221397816691e-07, "epoch": 3.7105714662795304, "percentage": 74.21, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.18, "total_tokens": 13843712} +{"current_steps": 28120, "total_steps": 37885, "loss": 0.0487, "lr": 3.782612436674375e-07, "epoch": 3.7112313580572787, "percentage": 74.22, "elapsed_time": "0:41:07", "remaining_time": "0:14:17", "throughput": 5610.43, "total_tokens": 13846208} +{"current_steps": 28125, "total_steps": 37885, "loss": 0.0401, "lr": 3.7790047951275394e-07, "epoch": 3.711891249835027, "percentage": 74.24, "elapsed_time": "0:41:08", "remaining_time": "0:14:16", "throughput": 5610.6, "total_tokens": 13848448} +{"current_steps": 28130, "total_steps": 37885, "loss": 0.0001, "lr": 3.7753984739418945e-07, "epoch": 3.7125511416127757, "percentage": 74.25, "elapsed_time": "0:41:08", "remaining_time": "0:14:16", "throughput": 5610.84, "total_tokens": 13850880} +{"current_steps": 28135, "total_steps": 37885, "loss": 0.0025, "lr": 3.771793473882844e-07, "epoch": 3.713211033390524, "percentage": 74.26, "elapsed_time": "0:41:08", "remaining_time": "0:14:15", "throughput": 5611.12, "total_tokens": 13853440} +{"current_steps": 28140, "total_steps": 37885, "loss": 0.0912, "lr": 3.768189795715532e-07, "epoch": 3.7138709251682727, "percentage": 74.28, "elapsed_time": "0:41:09", "remaining_time": "0:14:15", "throughput": 5611.41, "total_tokens": 13856000} +{"current_steps": 28145, "total_steps": 37885, "loss": 0.0, "lr": 3.764587440204804e-07, "epoch": 3.714530816946021, "percentage": 74.29, "elapsed_time": "0:41:09", "remaining_time": "0:14:14", "throughput": 5611.62, "total_tokens": 13858368} +{"current_steps": 28150, "total_steps": 37885, "loss": 0.0002, "lr": 3.7609864081152387e-07, "epoch": 3.715190708723769, "percentage": 74.3, "elapsed_time": "0:41:09", "remaining_time": "0:14:14", "throughput": 5611.95, "total_tokens": 13861056} +{"current_steps": 28155, "total_steps": 37885, "loss": 0.0, "lr": 3.7573867002111324e-07, "epoch": 3.7158506005015175, "percentage": 74.32, "elapsed_time": "0:41:10", "remaining_time": "0:14:13", "throughput": 5612.21, "total_tokens": 13863552} +{"current_steps": 28160, "total_steps": 37885, "loss": 0.0, "lr": 3.753788317256488e-07, "epoch": 3.716510492279266, "percentage": 74.33, "elapsed_time": "0:41:10", "remaining_time": "0:14:13", "throughput": 5612.53, "total_tokens": 13866240} +{"current_steps": 28165, "total_steps": 37885, "loss": 0.0, "lr": 3.7501912600150474e-07, "epoch": 3.7171703840570145, "percentage": 74.34, "elapsed_time": "0:41:10", "remaining_time": "0:14:12", "throughput": 5612.68, "total_tokens": 13868480} +{"current_steps": 28170, "total_steps": 37885, "loss": 0.0, "lr": 3.7465955292502505e-07, "epoch": 3.717830275834763, "percentage": 74.36, "elapsed_time": "0:41:11", "remaining_time": "0:14:12", "throughput": 5612.79, "total_tokens": 13870592} +{"current_steps": 28175, "total_steps": 37885, "loss": 0.0308, "lr": 3.7430011257252735e-07, "epoch": 3.7184901676125115, "percentage": 74.37, "elapsed_time": "0:41:11", "remaining_time": "0:14:11", "throughput": 5612.88, "total_tokens": 13872704} +{"current_steps": 28180, "total_steps": 37885, "loss": 0.0003, "lr": 3.7394080502029934e-07, "epoch": 3.7191500593902598, "percentage": 74.38, "elapsed_time": "0:41:11", "remaining_time": "0:14:11", "throughput": 5613.01, "total_tokens": 13874880} +{"current_steps": 28185, "total_steps": 37885, "loss": 0.0302, "lr": 3.73581630344602e-07, "epoch": 3.7198099511680085, "percentage": 74.4, "elapsed_time": "0:41:12", "remaining_time": "0:14:10", "throughput": 5613.2, "total_tokens": 13877248} +{"current_steps": 28190, "total_steps": 37885, "loss": 0.0, "lr": 3.732225886216678e-07, "epoch": 3.7204698429457568, "percentage": 74.41, "elapsed_time": "0:41:12", "remaining_time": "0:14:10", "throughput": 5613.44, "total_tokens": 13879744} +{"current_steps": 28195, "total_steps": 37885, "loss": 0.0001, "lr": 3.7286367992769994e-07, "epoch": 3.7211297347235055, "percentage": 74.42, "elapsed_time": "0:41:12", "remaining_time": "0:14:09", "throughput": 5613.64, "total_tokens": 13882112} +{"current_steps": 28200, "total_steps": 37885, "loss": 0.0471, "lr": 3.7250490433887473e-07, "epoch": 3.721789626501254, "percentage": 74.44, "elapsed_time": "0:41:13", "remaining_time": "0:14:09", "throughput": 5613.81, "total_tokens": 13884416} +{"current_steps": 28205, "total_steps": 37885, "loss": 0.0548, "lr": 3.7214626193133993e-07, "epoch": 3.722449518279002, "percentage": 74.45, "elapsed_time": "0:41:13", "remaining_time": "0:14:08", "throughput": 5613.96, "total_tokens": 13886656} +{"current_steps": 28210, "total_steps": 37885, "loss": 0.1484, "lr": 3.717877527812141e-07, "epoch": 3.723109410056751, "percentage": 74.46, "elapsed_time": "0:41:13", "remaining_time": "0:14:08", "throughput": 5614.18, "total_tokens": 13889088} +{"current_steps": 28215, "total_steps": 37885, "loss": 0.0813, "lr": 3.714293769645886e-07, "epoch": 3.723769301834499, "percentage": 74.48, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.37, "total_tokens": 13891456} +{"current_steps": 28220, "total_steps": 37885, "loss": 0.0005, "lr": 3.710711345575261e-07, "epoch": 3.724429193612248, "percentage": 74.49, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.64, "total_tokens": 13894016} +{"current_steps": 28225, "total_steps": 37885, "loss": 0.0, "lr": 3.707130256360614e-07, "epoch": 3.725089085389996, "percentage": 74.5, "elapsed_time": "0:41:14", "remaining_time": "0:14:07", "throughput": 5614.89, "total_tokens": 13896512} +{"current_steps": 28230, "total_steps": 37885, "loss": 0.0181, "lr": 3.7035505027619964e-07, "epoch": 3.7257489771677443, "percentage": 74.51, "elapsed_time": "0:41:15", "remaining_time": "0:14:06", "throughput": 5615.13, "total_tokens": 13899008} +{"current_steps": 28235, "total_steps": 37885, "loss": 0.0411, "lr": 3.6999720855391893e-07, "epoch": 3.726408868945493, "percentage": 74.53, "elapsed_time": "0:41:15", "remaining_time": "0:14:06", "throughput": 5615.42, "total_tokens": 13901632} +{"current_steps": 28240, "total_steps": 37885, "loss": 0.0704, "lr": 3.696395005451689e-07, "epoch": 3.7270687607232413, "percentage": 74.54, "elapsed_time": "0:41:15", "remaining_time": "0:14:05", "throughput": 5615.59, "total_tokens": 13903936} +{"current_steps": 28245, "total_steps": 37885, "loss": 0.0001, "lr": 3.6928192632586986e-07, "epoch": 3.72772865250099, "percentage": 74.55, "elapsed_time": "0:41:16", "remaining_time": "0:14:05", "throughput": 5615.81, "total_tokens": 13906368} +{"current_steps": 28250, "total_steps": 37885, "loss": 0.0881, "lr": 3.6892448597191463e-07, "epoch": 3.7283885442787383, "percentage": 74.57, "elapsed_time": "0:41:16", "remaining_time": "0:14:04", "throughput": 5616.1, "total_tokens": 13908992} +{"current_steps": 28255, "total_steps": 37885, "loss": 0.0367, "lr": 3.685671795591677e-07, "epoch": 3.7290484360564866, "percentage": 74.58, "elapsed_time": "0:41:16", "remaining_time": "0:14:04", "throughput": 5616.43, "total_tokens": 13911744} +{"current_steps": 28260, "total_steps": 37885, "loss": 0.0018, "lr": 3.682100071634642e-07, "epoch": 3.7297083278342353, "percentage": 74.59, "elapsed_time": "0:41:17", "remaining_time": "0:14:03", "throughput": 5616.68, "total_tokens": 13914240} +{"current_steps": 28265, "total_steps": 37885, "loss": 0.0003, "lr": 3.6785296886061144e-07, "epoch": 3.7303682196119836, "percentage": 74.61, "elapsed_time": "0:41:17", "remaining_time": "0:14:03", "throughput": 5617.07, "total_tokens": 13917120} +{"current_steps": 28270, "total_steps": 37885, "loss": 0.0, "lr": 3.674960647263885e-07, "epoch": 3.7310281113897323, "percentage": 74.62, "elapsed_time": "0:41:17", "remaining_time": "0:14:02", "throughput": 5617.32, "total_tokens": 13919616} +{"current_steps": 28275, "total_steps": 37885, "loss": 0.0004, "lr": 3.671392948365458e-07, "epoch": 3.7316880031674806, "percentage": 74.63, "elapsed_time": "0:41:18", "remaining_time": "0:14:02", "throughput": 5617.72, "total_tokens": 13922560} +{"current_steps": 28280, "total_steps": 37885, "loss": 0.0002, "lr": 3.667826592668052e-07, "epoch": 3.732347894945229, "percentage": 74.65, "elapsed_time": "0:41:18", "remaining_time": "0:14:01", "throughput": 5618.08, "total_tokens": 13925376} +{"current_steps": 28285, "total_steps": 37885, "loss": 0.0006, "lr": 3.664261580928589e-07, "epoch": 3.733007786722977, "percentage": 74.66, "elapsed_time": "0:41:19", "remaining_time": "0:14:01", "throughput": 5618.35, "total_tokens": 13927936} +{"current_steps": 28290, "total_steps": 37885, "loss": 0.0044, "lr": 3.660697913903733e-07, "epoch": 3.733667678500726, "percentage": 74.67, "elapsed_time": "0:41:19", "remaining_time": "0:14:00", "throughput": 5618.5, "total_tokens": 13930176} +{"current_steps": 28295, "total_steps": 37885, "loss": 0.0001, "lr": 3.6571355923498346e-07, "epoch": 3.734327570278474, "percentage": 74.69, "elapsed_time": "0:41:19", "remaining_time": "0:14:00", "throughput": 5618.79, "total_tokens": 13932800} +{"current_steps": 28300, "total_steps": 37885, "loss": 0.0049, "lr": 3.6535746170229777e-07, "epoch": 3.734987462056223, "percentage": 74.7, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.07, "total_tokens": 13935424} +{"current_steps": 28305, "total_steps": 37885, "loss": 0.0039, "lr": 3.6500149886789524e-07, "epoch": 3.735647353833971, "percentage": 74.71, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.4, "total_tokens": 13938176} +{"current_steps": 28310, "total_steps": 37885, "loss": 0.0075, "lr": 3.64645670807326e-07, "epoch": 3.7363072456117195, "percentage": 74.73, "elapsed_time": "0:41:20", "remaining_time": "0:13:59", "throughput": 5619.64, "total_tokens": 13940672} +{"current_steps": 28315, "total_steps": 37885, "loss": 0.0013, "lr": 3.642899775961127e-07, "epoch": 3.736967137389468, "percentage": 74.74, "elapsed_time": "0:41:21", "remaining_time": "0:13:58", "throughput": 5619.91, "total_tokens": 13943232} +{"current_steps": 28320, "total_steps": 37885, "loss": 0.0224, "lr": 3.6393441930974734e-07, "epoch": 3.7376270291672165, "percentage": 74.75, "elapsed_time": "0:41:21", "remaining_time": "0:13:58", "throughput": 5620.04, "total_tokens": 13945472} +{"current_steps": 28325, "total_steps": 37885, "loss": 0.1003, "lr": 3.6357899602369626e-07, "epoch": 3.738286920944965, "percentage": 74.77, "elapsed_time": "0:41:21", "remaining_time": "0:13:57", "throughput": 5620.41, "total_tokens": 13948288} +{"current_steps": 28330, "total_steps": 37885, "loss": 0.0, "lr": 3.632237078133946e-07, "epoch": 3.7389468127227135, "percentage": 74.78, "elapsed_time": "0:41:22", "remaining_time": "0:13:57", "throughput": 5620.52, "total_tokens": 13950464} +{"current_steps": 28335, "total_steps": 37885, "loss": 0.0087, "lr": 3.628685547542496e-07, "epoch": 3.7396067045004617, "percentage": 74.79, "elapsed_time": "0:41:22", "remaining_time": "0:13:56", "throughput": 5620.65, "total_tokens": 13952640} +{"current_steps": 28340, "total_steps": 37885, "loss": 0.0, "lr": 3.6251353692164e-07, "epoch": 3.7402665962782105, "percentage": 74.81, "elapsed_time": "0:41:22", "remaining_time": "0:13:56", "throughput": 5620.81, "total_tokens": 13954944} +{"current_steps": 28345, "total_steps": 37885, "loss": 0.0097, "lr": 3.6215865439091587e-07, "epoch": 3.7409264880559587, "percentage": 74.82, "elapsed_time": "0:41:23", "remaining_time": "0:13:55", "throughput": 5620.95, "total_tokens": 13957184} +{"current_steps": 28350, "total_steps": 37885, "loss": 0.0, "lr": 3.6180390723739883e-07, "epoch": 3.7415863798337075, "percentage": 74.83, "elapsed_time": "0:41:23", "remaining_time": "0:13:55", "throughput": 5621.14, "total_tokens": 13959552} +{"current_steps": 28355, "total_steps": 37885, "loss": 0.0004, "lr": 3.614492955363806e-07, "epoch": 3.7422462716114557, "percentage": 74.84, "elapsed_time": "0:41:23", "remaining_time": "0:13:54", "throughput": 5621.46, "total_tokens": 13962240} +{"current_steps": 28360, "total_steps": 37885, "loss": 0.0844, "lr": 3.610948193631255e-07, "epoch": 3.742906163389204, "percentage": 74.86, "elapsed_time": "0:41:24", "remaining_time": "0:13:54", "throughput": 5621.63, "total_tokens": 13964544} +{"current_steps": 28365, "total_steps": 37885, "loss": 0.0, "lr": 3.607404787928686e-07, "epoch": 3.7435660551669527, "percentage": 74.87, "elapsed_time": "0:41:24", "remaining_time": "0:13:53", "throughput": 5621.87, "total_tokens": 13967040} +{"current_steps": 28370, "total_steps": 37885, "loss": 0.0, "lr": 3.6038627390081567e-07, "epoch": 3.744225946944701, "percentage": 74.88, "elapsed_time": "0:41:24", "remaining_time": "0:13:53", "throughput": 5622.19, "total_tokens": 13969728} +{"current_steps": 28375, "total_steps": 37885, "loss": 0.0, "lr": 3.6003220476214445e-07, "epoch": 3.7448858387224497, "percentage": 74.9, "elapsed_time": "0:41:25", "remaining_time": "0:13:52", "throughput": 5622.5, "total_tokens": 13972416} +{"current_steps": 28380, "total_steps": 37885, "loss": 0.0004, "lr": 3.596782714520037e-07, "epoch": 3.745545730500198, "percentage": 74.91, "elapsed_time": "0:41:25", "remaining_time": "0:13:52", "throughput": 5622.79, "total_tokens": 13975040} +{"current_steps": 28385, "total_steps": 37885, "loss": 0.0004, "lr": 3.593244740455127e-07, "epoch": 3.7462056222779463, "percentage": 74.92, "elapsed_time": "0:41:25", "remaining_time": "0:13:51", "throughput": 5623.02, "total_tokens": 13977472} +{"current_steps": 28390, "total_steps": 37885, "loss": 0.0176, "lr": 3.5897081261776275e-07, "epoch": 3.746865514055695, "percentage": 74.94, "elapsed_time": "0:41:26", "remaining_time": "0:13:51", "throughput": 5623.19, "total_tokens": 13979776} +{"current_steps": 28395, "total_steps": 37885, "loss": 0.0001, "lr": 3.586172872438158e-07, "epoch": 3.7475254058334433, "percentage": 74.95, "elapsed_time": "0:41:26", "remaining_time": "0:13:50", "throughput": 5623.46, "total_tokens": 13982336} +{"current_steps": 28400, "total_steps": 37885, "loss": 0.0352, "lr": 3.582638979987054e-07, "epoch": 3.748185297611192, "percentage": 74.96, "elapsed_time": "0:41:26", "remaining_time": "0:13:50", "throughput": 5623.68, "total_tokens": 13984768} +{"current_steps": 28405, "total_steps": 37885, "loss": 0.0521, "lr": 3.579106449574353e-07, "epoch": 3.7488451893889403, "percentage": 74.98, "elapsed_time": "0:41:27", "remaining_time": "0:13:50", "throughput": 5623.77, "total_tokens": 13986880} +{"current_steps": 28410, "total_steps": 37885, "loss": 0.0, "lr": 3.5755752819498107e-07, "epoch": 3.7495050811666886, "percentage": 74.99, "elapsed_time": "0:41:27", "remaining_time": "0:13:49", "throughput": 5624.13, "total_tokens": 13989696} +{"current_steps": 28415, "total_steps": 37885, "loss": 0.0109, "lr": 3.572045477862896e-07, "epoch": 3.750164972944437, "percentage": 75.0, "elapsed_time": "0:41:27", "remaining_time": "0:13:49", "throughput": 5624.27, "total_tokens": 13991936} +{"current_steps": 28420, "total_steps": 37885, "loss": 0.0003, "lr": 3.568517038062778e-07, "epoch": 3.7508248647221856, "percentage": 75.02, "elapsed_time": "0:41:28", "remaining_time": "0:13:48", "throughput": 5624.54, "total_tokens": 13994496} +{"current_steps": 28425, "total_steps": 37885, "loss": 0.0243, "lr": 3.564989963298346e-07, "epoch": 3.751484756499934, "percentage": 75.03, "elapsed_time": "0:41:28", "remaining_time": "0:13:48", "throughput": 5624.65, "total_tokens": 13996672} +{"current_steps": 28425, "total_steps": 37885, "eval_loss": 0.20364880561828613, "epoch": 3.751484756499934, "percentage": 75.03, "elapsed_time": "0:41:36", "remaining_time": "0:13:50", "throughput": 5606.82, "total_tokens": 13996672} +{"current_steps": 28430, "total_steps": 37885, "loss": 0.1141, "lr": 3.5614642543181996e-07, "epoch": 3.7521446482776826, "percentage": 75.04, "elapsed_time": "0:42:12", "remaining_time": "0:14:02", "throughput": 5526.95, "total_tokens": 13998976} +{"current_steps": 28435, "total_steps": 37885, "loss": 0.0002, "lr": 3.5579399118706364e-07, "epoch": 3.752804540055431, "percentage": 75.06, "elapsed_time": "0:42:13", "remaining_time": "0:14:01", "throughput": 5527.1, "total_tokens": 14001152} +{"current_steps": 28440, "total_steps": 37885, "loss": 0.0294, "lr": 3.5544169367036783e-07, "epoch": 3.753464431833179, "percentage": 75.07, "elapsed_time": "0:42:13", "remaining_time": "0:14:01", "throughput": 5527.31, "total_tokens": 14003520} +{"current_steps": 28445, "total_steps": 37885, "loss": 0.0, "lr": 3.550895329565049e-07, "epoch": 3.754124323610928, "percentage": 75.08, "elapsed_time": "0:42:13", "remaining_time": "0:14:00", "throughput": 5527.51, "total_tokens": 14005824} +{"current_steps": 28450, "total_steps": 37885, "loss": 0.0, "lr": 3.5473750912021894e-07, "epoch": 3.754784215388676, "percentage": 75.1, "elapsed_time": "0:42:14", "remaining_time": "0:14:00", "throughput": 5527.7, "total_tokens": 14008128} +{"current_steps": 28455, "total_steps": 37885, "loss": 0.0096, "lr": 3.543856222362239e-07, "epoch": 3.755444107166425, "percentage": 75.11, "elapsed_time": "0:42:14", "remaining_time": "0:13:59", "throughput": 5527.95, "total_tokens": 14010560} +{"current_steps": 28460, "total_steps": 37885, "loss": 0.0, "lr": 3.540338723792049e-07, "epoch": 3.756103998944173, "percentage": 75.12, "elapsed_time": "0:42:14", "remaining_time": "0:13:59", "throughput": 5528.25, "total_tokens": 14013184} +{"current_steps": 28465, "total_steps": 37885, "loss": 0.0, "lr": 3.5368225962381924e-07, "epoch": 3.7567638907219214, "percentage": 75.14, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5528.47, "total_tokens": 14015552} +{"current_steps": 28470, "total_steps": 37885, "loss": 0.0002, "lr": 3.533307840446935e-07, "epoch": 3.75742378249967, "percentage": 75.15, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5528.75, "total_tokens": 14018112} +{"current_steps": 28475, "total_steps": 37885, "loss": 0.0, "lr": 3.529794457164265e-07, "epoch": 3.7580836742774184, "percentage": 75.16, "elapsed_time": "0:42:15", "remaining_time": "0:13:58", "throughput": 5529.07, "total_tokens": 14020736} +{"current_steps": 28480, "total_steps": 37885, "loss": 0.0088, "lr": 3.526282447135862e-07, "epoch": 3.758743566055167, "percentage": 75.17, "elapsed_time": "0:42:16", "remaining_time": "0:13:57", "throughput": 5529.29, "total_tokens": 14023104} +{"current_steps": 28485, "total_steps": 37885, "loss": 0.0457, "lr": 3.5227718111071316e-07, "epoch": 3.7594034578329154, "percentage": 75.19, "elapsed_time": "0:42:16", "remaining_time": "0:13:57", "throughput": 5529.58, "total_tokens": 14025664} +{"current_steps": 28490, "total_steps": 37885, "loss": 0.0175, "lr": 3.519262549823183e-07, "epoch": 3.7600633496106637, "percentage": 75.2, "elapsed_time": "0:42:16", "remaining_time": "0:13:56", "throughput": 5529.7, "total_tokens": 14027776} +{"current_steps": 28495, "total_steps": 37885, "loss": 0.0004, "lr": 3.5157546640288227e-07, "epoch": 3.7607232413884124, "percentage": 75.21, "elapsed_time": "0:42:17", "remaining_time": "0:13:56", "throughput": 5529.92, "total_tokens": 14030144} +{"current_steps": 28500, "total_steps": 37885, "loss": 0.0067, "lr": 3.5122481544685857e-07, "epoch": 3.7613831331661607, "percentage": 75.23, "elapsed_time": "0:42:17", "remaining_time": "0:13:55", "throughput": 5530.15, "total_tokens": 14032576} +{"current_steps": 28505, "total_steps": 37885, "loss": 0.0166, "lr": 3.5087430218866945e-07, "epoch": 3.7620430249439094, "percentage": 75.24, "elapsed_time": "0:42:17", "remaining_time": "0:13:55", "throughput": 5530.37, "total_tokens": 14034944} +{"current_steps": 28510, "total_steps": 37885, "loss": 0.0, "lr": 3.505239267027094e-07, "epoch": 3.7627029167216577, "percentage": 75.25, "elapsed_time": "0:42:18", "remaining_time": "0:13:54", "throughput": 5530.58, "total_tokens": 14037312} +{"current_steps": 28515, "total_steps": 37885, "loss": 0.0208, "lr": 3.5017368906334235e-07, "epoch": 3.763362808499406, "percentage": 75.27, "elapsed_time": "0:42:18", "remaining_time": "0:13:54", "throughput": 5530.87, "total_tokens": 14039872} +{"current_steps": 28520, "total_steps": 37885, "loss": 0.0324, "lr": 3.498235893449042e-07, "epoch": 3.7640227002771547, "percentage": 75.28, "elapsed_time": "0:42:18", "remaining_time": "0:13:53", "throughput": 5531.08, "total_tokens": 14042240} +{"current_steps": 28525, "total_steps": 37885, "loss": 0.0005, "lr": 3.494736276217013e-07, "epoch": 3.764682592054903, "percentage": 75.29, "elapsed_time": "0:42:19", "remaining_time": "0:13:53", "throughput": 5531.32, "total_tokens": 14044672} +{"current_steps": 28530, "total_steps": 37885, "loss": 0.0003, "lr": 3.4912380396800987e-07, "epoch": 3.7653424838326517, "percentage": 75.31, "elapsed_time": "0:42:19", "remaining_time": "0:13:52", "throughput": 5531.53, "total_tokens": 14047040} +{"current_steps": 28535, "total_steps": 37885, "loss": 0.0735, "lr": 3.4877411845807783e-07, "epoch": 3.7660023756104, "percentage": 75.32, "elapsed_time": "0:42:19", "remaining_time": "0:13:52", "throughput": 5531.91, "total_tokens": 14049856} +{"current_steps": 28540, "total_steps": 37885, "loss": 0.0352, "lr": 3.4842457116612365e-07, "epoch": 3.7666622673881482, "percentage": 75.33, "elapsed_time": "0:42:20", "remaining_time": "0:13:51", "throughput": 5532.15, "total_tokens": 14052288} +{"current_steps": 28545, "total_steps": 37885, "loss": 0.0001, "lr": 3.4807516216633557e-07, "epoch": 3.7673221591658965, "percentage": 75.35, "elapsed_time": "0:42:20", "remaining_time": "0:13:51", "throughput": 5532.31, "total_tokens": 14054528} +{"current_steps": 28550, "total_steps": 37885, "loss": 0.0, "lr": 3.477258915328735e-07, "epoch": 3.7679820509436452, "percentage": 75.36, "elapsed_time": "0:42:20", "remaining_time": "0:13:50", "throughput": 5532.52, "total_tokens": 14056896} +{"current_steps": 28555, "total_steps": 37885, "loss": 0.0, "lr": 3.4737675933986744e-07, "epoch": 3.768641942721394, "percentage": 75.37, "elapsed_time": "0:42:21", "remaining_time": "0:13:50", "throughput": 5532.78, "total_tokens": 14059392} +{"current_steps": 28560, "total_steps": 37885, "loss": 0.0, "lr": 3.4702776566141864e-07, "epoch": 3.7693018344991422, "percentage": 75.39, "elapsed_time": "0:42:21", "remaining_time": "0:13:49", "throughput": 5532.97, "total_tokens": 14061696} +{"current_steps": 28565, "total_steps": 37885, "loss": 0.0002, "lr": 3.4667891057159784e-07, "epoch": 3.7699617262768905, "percentage": 75.4, "elapsed_time": "0:42:21", "remaining_time": "0:13:49", "throughput": 5533.06, "total_tokens": 14063744} +{"current_steps": 28570, "total_steps": 37885, "loss": 0.0906, "lr": 3.463301941444473e-07, "epoch": 3.770621618054639, "percentage": 75.41, "elapsed_time": "0:42:22", "remaining_time": "0:13:48", "throughput": 5533.32, "total_tokens": 14066240} +{"current_steps": 28575, "total_steps": 37885, "loss": 0.0, "lr": 3.459816164539798e-07, "epoch": 3.7712815098323875, "percentage": 75.43, "elapsed_time": "0:42:22", "remaining_time": "0:13:48", "throughput": 5533.58, "total_tokens": 14068736} +{"current_steps": 28580, "total_steps": 37885, "loss": 0.1, "lr": 3.456331775741779e-07, "epoch": 3.771941401610136, "percentage": 75.44, "elapsed_time": "0:42:22", "remaining_time": "0:13:47", "throughput": 5533.84, "total_tokens": 14071232} +{"current_steps": 28585, "total_steps": 37885, "loss": 0.0, "lr": 3.452848775789955e-07, "epoch": 3.7726012933878845, "percentage": 75.45, "elapsed_time": "0:42:23", "remaining_time": "0:13:47", "throughput": 5534.08, "total_tokens": 14073664} +{"current_steps": 28590, "total_steps": 37885, "loss": 0.0002, "lr": 3.449367165423571e-07, "epoch": 3.773261185165633, "percentage": 75.47, "elapsed_time": "0:42:23", "remaining_time": "0:13:46", "throughput": 5534.24, "total_tokens": 14075904} +{"current_steps": 28595, "total_steps": 37885, "loss": 0.002, "lr": 3.4458869453815674e-07, "epoch": 3.773921076943381, "percentage": 75.48, "elapsed_time": "0:42:23", "remaining_time": "0:13:46", "throughput": 5534.43, "total_tokens": 14078208} +{"current_steps": 28600, "total_steps": 37885, "loss": 0.0446, "lr": 3.4424081164025976e-07, "epoch": 3.77458096872113, "percentage": 75.49, "elapsed_time": "0:42:24", "remaining_time": "0:13:45", "throughput": 5534.69, "total_tokens": 14080704} +{"current_steps": 28605, "total_steps": 37885, "loss": 0.0, "lr": 3.4389306792250194e-07, "epoch": 3.775240860498878, "percentage": 75.5, "elapsed_time": "0:42:24", "remaining_time": "0:13:45", "throughput": 5534.91, "total_tokens": 14083072} +{"current_steps": 28610, "total_steps": 37885, "loss": 0.0, "lr": 3.435454634586896e-07, "epoch": 3.775900752276627, "percentage": 75.52, "elapsed_time": "0:42:24", "remaining_time": "0:13:44", "throughput": 5535.04, "total_tokens": 14085248} +{"current_steps": 28615, "total_steps": 37885, "loss": 0.0001, "lr": 3.431979983225987e-07, "epoch": 3.776560644054375, "percentage": 75.53, "elapsed_time": "0:42:25", "remaining_time": "0:13:44", "throughput": 5535.37, "total_tokens": 14087936} +{"current_steps": 28620, "total_steps": 37885, "loss": 0.0266, "lr": 3.4285067258797626e-07, "epoch": 3.7772205358321234, "percentage": 75.54, "elapsed_time": "0:42:25", "remaining_time": "0:13:44", "throughput": 5535.61, "total_tokens": 14090368} +{"current_steps": 28625, "total_steps": 37885, "loss": 0.0001, "lr": 3.425034863285404e-07, "epoch": 3.777880427609872, "percentage": 75.56, "elapsed_time": "0:42:25", "remaining_time": "0:13:43", "throughput": 5536.12, "total_tokens": 14093568} +{"current_steps": 28630, "total_steps": 37885, "loss": 0.0341, "lr": 3.42156439617978e-07, "epoch": 3.7785403193876204, "percentage": 75.57, "elapsed_time": "0:42:26", "remaining_time": "0:13:43", "throughput": 5536.29, "total_tokens": 14095808} +{"current_steps": 28635, "total_steps": 37885, "loss": 0.0, "lr": 3.418095325299475e-07, "epoch": 3.779200211165369, "percentage": 75.58, "elapsed_time": "0:42:26", "remaining_time": "0:13:42", "throughput": 5536.58, "total_tokens": 14098368} +{"current_steps": 28640, "total_steps": 37885, "loss": 0.0001, "lr": 3.414627651380778e-07, "epoch": 3.7798601029431174, "percentage": 75.6, "elapsed_time": "0:42:26", "remaining_time": "0:13:42", "throughput": 5536.8, "total_tokens": 14100736} +{"current_steps": 28645, "total_steps": 37885, "loss": 0.0, "lr": 3.4111613751596725e-07, "epoch": 3.7805199947208656, "percentage": 75.61, "elapsed_time": "0:42:27", "remaining_time": "0:13:41", "throughput": 5537.01, "total_tokens": 14103104} +{"current_steps": 28650, "total_steps": 37885, "loss": 0.0004, "lr": 3.407696497371855e-07, "epoch": 3.7811798864986144, "percentage": 75.62, "elapsed_time": "0:42:27", "remaining_time": "0:13:41", "throughput": 5537.28, "total_tokens": 14105600} +{"current_steps": 28655, "total_steps": 37885, "loss": 0.0, "lr": 3.40423301875271e-07, "epoch": 3.7818397782763626, "percentage": 75.64, "elapsed_time": "0:42:27", "remaining_time": "0:13:40", "throughput": 5537.4, "total_tokens": 14107712} +{"current_steps": 28660, "total_steps": 37885, "loss": 0.0008, "lr": 3.400770940037353e-07, "epoch": 3.7824996700541114, "percentage": 75.65, "elapsed_time": "0:42:28", "remaining_time": "0:13:40", "throughput": 5537.62, "total_tokens": 14110080} +{"current_steps": 28665, "total_steps": 37885, "loss": 0.0054, "lr": 3.3973102619605753e-07, "epoch": 3.7831595618318596, "percentage": 75.66, "elapsed_time": "0:42:28", "remaining_time": "0:13:39", "throughput": 5537.87, "total_tokens": 14112512} +{"current_steps": 28670, "total_steps": 37885, "loss": 0.0, "lr": 3.3938509852568773e-07, "epoch": 3.783819453609608, "percentage": 75.68, "elapsed_time": "0:42:28", "remaining_time": "0:13:39", "throughput": 5537.99, "total_tokens": 14114624} +{"current_steps": 28675, "total_steps": 37885, "loss": 0.0, "lr": 3.390393110660471e-07, "epoch": 3.784479345387356, "percentage": 75.69, "elapsed_time": "0:42:29", "remaining_time": "0:13:38", "throughput": 5538.19, "total_tokens": 14116928} +{"current_steps": 28680, "total_steps": 37885, "loss": 0.0, "lr": 3.386936638905263e-07, "epoch": 3.785139237165105, "percentage": 75.7, "elapsed_time": "0:42:29", "remaining_time": "0:13:38", "throughput": 5538.41, "total_tokens": 14119296} +{"current_steps": 28685, "total_steps": 37885, "loss": 0.0, "lr": 3.38348157072487e-07, "epoch": 3.7857991289428536, "percentage": 75.72, "elapsed_time": "0:42:29", "remaining_time": "0:13:37", "throughput": 5538.6, "total_tokens": 14121600} +{"current_steps": 28690, "total_steps": 37885, "loss": 0.0426, "lr": 3.380027906852596e-07, "epoch": 3.786459020720602, "percentage": 75.73, "elapsed_time": "0:42:29", "remaining_time": "0:13:37", "throughput": 5538.76, "total_tokens": 14123840} +{"current_steps": 28695, "total_steps": 37885, "loss": 0.0, "lr": 3.3765756480214616e-07, "epoch": 3.78711891249835, "percentage": 75.74, "elapsed_time": "0:42:30", "remaining_time": "0:13:36", "throughput": 5538.97, "total_tokens": 14126208} +{"current_steps": 28700, "total_steps": 37885, "loss": 0.0, "lr": 3.373124794964185e-07, "epoch": 3.7877788042760985, "percentage": 75.76, "elapsed_time": "0:42:30", "remaining_time": "0:13:36", "throughput": 5539.21, "total_tokens": 14128640} +{"current_steps": 28705, "total_steps": 37885, "loss": 0.0004, "lr": 3.36967534841318e-07, "epoch": 3.788438696053847, "percentage": 75.77, "elapsed_time": "0:42:30", "remaining_time": "0:13:35", "throughput": 5539.39, "total_tokens": 14130944} +{"current_steps": 28710, "total_steps": 37885, "loss": 0.0002, "lr": 3.3662273091005687e-07, "epoch": 3.7890985878315955, "percentage": 75.78, "elapsed_time": "0:42:31", "remaining_time": "0:13:35", "throughput": 5539.68, "total_tokens": 14133504} +{"current_steps": 28715, "total_steps": 37885, "loss": 0.0011, "lr": 3.3627806777581777e-07, "epoch": 3.789758479609344, "percentage": 75.8, "elapsed_time": "0:42:31", "remaining_time": "0:13:34", "throughput": 5539.98, "total_tokens": 14136128} +{"current_steps": 28720, "total_steps": 37885, "loss": 0.0919, "lr": 3.35933545511752e-07, "epoch": 3.7904183713870925, "percentage": 75.81, "elapsed_time": "0:42:31", "remaining_time": "0:13:34", "throughput": 5540.16, "total_tokens": 14138432} +{"current_steps": 28725, "total_steps": 37885, "loss": 0.0203, "lr": 3.3558916419098247e-07, "epoch": 3.7910782631648408, "percentage": 75.82, "elapsed_time": "0:42:32", "remaining_time": "0:13:33", "throughput": 5540.43, "total_tokens": 14140928} +{"current_steps": 28730, "total_steps": 37885, "loss": 0.0014, "lr": 3.3524492388660166e-07, "epoch": 3.7917381549425895, "percentage": 75.83, "elapsed_time": "0:42:32", "remaining_time": "0:13:33", "throughput": 5540.65, "total_tokens": 14143296} +{"current_steps": 28735, "total_steps": 37885, "loss": 0.0003, "lr": 3.349008246716721e-07, "epoch": 3.7923980467203378, "percentage": 75.85, "elapsed_time": "0:42:32", "remaining_time": "0:13:32", "throughput": 5540.95, "total_tokens": 14145920} +{"current_steps": 28740, "total_steps": 37885, "loss": 0.0854, "lr": 3.345568666192261e-07, "epoch": 3.7930579384980865, "percentage": 75.86, "elapsed_time": "0:42:33", "remaining_time": "0:13:32", "throughput": 5541.23, "total_tokens": 14148480} +{"current_steps": 28745, "total_steps": 37885, "loss": 0.0, "lr": 3.3421304980226627e-07, "epoch": 3.7937178302758348, "percentage": 75.87, "elapsed_time": "0:42:33", "remaining_time": "0:13:31", "throughput": 5541.48, "total_tokens": 14150976} +{"current_steps": 28750, "total_steps": 37885, "loss": 0.0001, "lr": 3.338693742937657e-07, "epoch": 3.794377722053583, "percentage": 75.89, "elapsed_time": "0:42:33", "remaining_time": "0:13:31", "throughput": 5541.81, "total_tokens": 14153728} +{"current_steps": 28755, "total_steps": 37885, "loss": 0.0002, "lr": 3.3352584016666654e-07, "epoch": 3.7950376138313318, "percentage": 75.9, "elapsed_time": "0:42:34", "remaining_time": "0:13:31", "throughput": 5542.1, "total_tokens": 14156288} +{"current_steps": 28760, "total_steps": 37885, "loss": 0.0006, "lr": 3.3318244749388136e-07, "epoch": 3.79569750560908, "percentage": 75.91, "elapsed_time": "0:42:34", "remaining_time": "0:13:30", "throughput": 5542.41, "total_tokens": 14158976} +{"current_steps": 28765, "total_steps": 37885, "loss": 0.0048, "lr": 3.328391963482934e-07, "epoch": 3.7963573973868288, "percentage": 75.93, "elapsed_time": "0:42:34", "remaining_time": "0:13:30", "throughput": 5542.67, "total_tokens": 14161472} +{"current_steps": 28770, "total_steps": 37885, "loss": 0.0, "lr": 3.3249608680275455e-07, "epoch": 3.797017289164577, "percentage": 75.94, "elapsed_time": "0:42:35", "remaining_time": "0:13:29", "throughput": 5542.92, "total_tokens": 14163968} +{"current_steps": 28775, "total_steps": 37885, "loss": 0.0007, "lr": 3.3215311893008744e-07, "epoch": 3.7976771809423253, "percentage": 75.95, "elapsed_time": "0:42:35", "remaining_time": "0:13:29", "throughput": 5543.21, "total_tokens": 14166592} +{"current_steps": 28780, "total_steps": 37885, "loss": 0.0502, "lr": 3.318102928030848e-07, "epoch": 3.798337072720074, "percentage": 75.97, "elapsed_time": "0:42:36", "remaining_time": "0:13:28", "throughput": 5543.56, "total_tokens": 14169344} +{"current_steps": 28785, "total_steps": 37885, "loss": 0.0001, "lr": 3.3146760849450916e-07, "epoch": 3.7989969644978223, "percentage": 75.98, "elapsed_time": "0:42:36", "remaining_time": "0:13:28", "throughput": 5543.84, "total_tokens": 14171904} +{"current_steps": 28790, "total_steps": 37885, "loss": 0.0, "lr": 3.3112506607709246e-07, "epoch": 3.799656856275571, "percentage": 75.99, "elapsed_time": "0:42:36", "remaining_time": "0:13:27", "throughput": 5544.07, "total_tokens": 14174336} +{"current_steps": 28795, "total_steps": 37885, "loss": 0.0001, "lr": 3.307826656235363e-07, "epoch": 3.8003167480533193, "percentage": 76.01, "elapsed_time": "0:42:37", "remaining_time": "0:13:27", "throughput": 5544.25, "total_tokens": 14176640} +{"current_steps": 28800, "total_steps": 37885, "loss": 0.028, "lr": 3.304404072065139e-07, "epoch": 3.8009766398310676, "percentage": 76.02, "elapsed_time": "0:42:37", "remaining_time": "0:13:26", "throughput": 5544.42, "total_tokens": 14178944} +{"current_steps": 28805, "total_steps": 37885, "loss": 0.0, "lr": 3.30098290898666e-07, "epoch": 3.8016365316088163, "percentage": 76.03, "elapsed_time": "0:42:37", "remaining_time": "0:13:26", "throughput": 5544.71, "total_tokens": 14181568} +{"current_steps": 28810, "total_steps": 37885, "loss": 0.0001, "lr": 3.2975631677260505e-07, "epoch": 3.8022964233865646, "percentage": 76.05, "elapsed_time": "0:42:38", "remaining_time": "0:13:25", "throughput": 5544.99, "total_tokens": 14184128} +{"current_steps": 28815, "total_steps": 37885, "loss": 0.0001, "lr": 3.294144849009122e-07, "epoch": 3.8029563151643133, "percentage": 76.06, "elapsed_time": "0:42:38", "remaining_time": "0:13:25", "throughput": 5545.21, "total_tokens": 14186560} +{"current_steps": 28820, "total_steps": 37885, "loss": 0.0382, "lr": 3.290727953561393e-07, "epoch": 3.8036162069420616, "percentage": 76.07, "elapsed_time": "0:42:38", "remaining_time": "0:13:24", "throughput": 5545.5, "total_tokens": 14189184} +{"current_steps": 28825, "total_steps": 37885, "loss": 0.0, "lr": 3.287312482108071e-07, "epoch": 3.80427609871981, "percentage": 76.09, "elapsed_time": "0:42:39", "remaining_time": "0:13:24", "throughput": 5545.71, "total_tokens": 14191616} +{"current_steps": 28830, "total_steps": 37885, "loss": 0.0738, "lr": 3.2838984353740593e-07, "epoch": 3.804935990497558, "percentage": 76.1, "elapsed_time": "0:42:39", "remaining_time": "0:13:23", "throughput": 5546.07, "total_tokens": 14194432} +{"current_steps": 28835, "total_steps": 37885, "loss": 0.0114, "lr": 3.2804858140839764e-07, "epoch": 3.805595882275307, "percentage": 76.11, "elapsed_time": "0:42:39", "remaining_time": "0:13:23", "throughput": 5546.39, "total_tokens": 14197120} +{"current_steps": 28840, "total_steps": 37885, "loss": 0.0564, "lr": 3.277074618962117e-07, "epoch": 3.806255774053055, "percentage": 76.13, "elapsed_time": "0:42:40", "remaining_time": "0:13:22", "throughput": 5546.56, "total_tokens": 14199424} +{"current_steps": 28845, "total_steps": 37885, "loss": 0.086, "lr": 3.2736648507324903e-07, "epoch": 3.806915665830804, "percentage": 76.14, "elapsed_time": "0:42:40", "remaining_time": "0:13:22", "throughput": 5546.75, "total_tokens": 14201792} +{"current_steps": 28850, "total_steps": 37885, "loss": 0.0, "lr": 3.270256510118786e-07, "epoch": 3.807575557608552, "percentage": 76.15, "elapsed_time": "0:42:40", "remaining_time": "0:13:21", "throughput": 5547.04, "total_tokens": 14204416} +{"current_steps": 28855, "total_steps": 37885, "loss": 0.0016, "lr": 3.2668495978444065e-07, "epoch": 3.8082354493863004, "percentage": 76.16, "elapsed_time": "0:42:41", "remaining_time": "0:13:21", "throughput": 5547.35, "total_tokens": 14207104} +{"current_steps": 28860, "total_steps": 37885, "loss": 0.063, "lr": 3.2634441146324445e-07, "epoch": 3.808895341164049, "percentage": 76.18, "elapsed_time": "0:42:41", "remaining_time": "0:13:20", "throughput": 5547.58, "total_tokens": 14209600} +{"current_steps": 28865, "total_steps": 37885, "loss": 0.0611, "lr": 3.26004006120568e-07, "epoch": 3.8095552329417974, "percentage": 76.19, "elapsed_time": "0:42:41", "remaining_time": "0:13:20", "throughput": 5547.73, "total_tokens": 14211840} +{"current_steps": 28870, "total_steps": 37885, "loss": 0.094, "lr": 3.256637438286612e-07, "epoch": 3.810215124719546, "percentage": 76.2, "elapsed_time": "0:42:42", "remaining_time": "0:13:20", "throughput": 5547.97, "total_tokens": 14214336} +{"current_steps": 28875, "total_steps": 37885, "loss": 0.0, "lr": 3.253236246597417e-07, "epoch": 3.8108750164972944, "percentage": 76.22, "elapsed_time": "0:42:42", "remaining_time": "0:13:19", "throughput": 5548.13, "total_tokens": 14216640} +{"current_steps": 28880, "total_steps": 37885, "loss": 0.0006, "lr": 3.2498364868599683e-07, "epoch": 3.8115349082750427, "percentage": 76.23, "elapsed_time": "0:42:42", "remaining_time": "0:13:19", "throughput": 5548.43, "total_tokens": 14219264} +{"current_steps": 28885, "total_steps": 37885, "loss": 0.0352, "lr": 3.2464381597958444e-07, "epoch": 3.8121948000527914, "percentage": 76.24, "elapsed_time": "0:42:43", "remaining_time": "0:13:18", "throughput": 5548.58, "total_tokens": 14221504} +{"current_steps": 28890, "total_steps": 37885, "loss": 0.0, "lr": 3.243041266126316e-07, "epoch": 3.8128546918305397, "percentage": 76.26, "elapsed_time": "0:42:43", "remaining_time": "0:13:18", "throughput": 5548.74, "total_tokens": 14223744} +{"current_steps": 28895, "total_steps": 37885, "loss": 0.0, "lr": 3.239645806572352e-07, "epoch": 3.8135145836082884, "percentage": 76.27, "elapsed_time": "0:42:43", "remaining_time": "0:13:17", "throughput": 5549.01, "total_tokens": 14226304} +{"current_steps": 28900, "total_steps": 37885, "loss": 0.0213, "lr": 3.2362517818546085e-07, "epoch": 3.8141744753860367, "percentage": 76.28, "elapsed_time": "0:42:44", "remaining_time": "0:13:17", "throughput": 5549.22, "total_tokens": 14228672} +{"current_steps": 28905, "total_steps": 37885, "loss": 0.0001, "lr": 3.2328591926934446e-07, "epoch": 3.814834367163785, "percentage": 76.3, "elapsed_time": "0:42:44", "remaining_time": "0:13:16", "throughput": 5549.53, "total_tokens": 14231360} +{"current_steps": 28910, "total_steps": 37885, "loss": 0.0001, "lr": 3.229468039808916e-07, "epoch": 3.8154942589415337, "percentage": 76.31, "elapsed_time": "0:42:44", "remaining_time": "0:13:16", "throughput": 5549.77, "total_tokens": 14233856} +{"current_steps": 28915, "total_steps": 37885, "loss": 0.0, "lr": 3.2260783239207644e-07, "epoch": 3.816154150719282, "percentage": 76.32, "elapsed_time": "0:42:45", "remaining_time": "0:13:15", "throughput": 5550.04, "total_tokens": 14236416} +{"current_steps": 28920, "total_steps": 37885, "loss": 0.0, "lr": 3.2226900457484354e-07, "epoch": 3.8168140424970307, "percentage": 76.34, "elapsed_time": "0:42:45", "remaining_time": "0:13:15", "throughput": 5550.26, "total_tokens": 14238848} +{"current_steps": 28925, "total_steps": 37885, "loss": 0.0217, "lr": 3.21930320601107e-07, "epoch": 3.817473934274779, "percentage": 76.35, "elapsed_time": "0:42:45", "remaining_time": "0:13:14", "throughput": 5550.64, "total_tokens": 14241728} +{"current_steps": 28930, "total_steps": 37885, "loss": 0.0001, "lr": 3.215917805427495e-07, "epoch": 3.8181338260525273, "percentage": 76.36, "elapsed_time": "0:42:46", "remaining_time": "0:13:14", "throughput": 5550.77, "total_tokens": 14243904} +{"current_steps": 28935, "total_steps": 37885, "loss": 0.0, "lr": 3.2125338447162386e-07, "epoch": 3.818793717830276, "percentage": 76.38, "elapsed_time": "0:42:46", "remaining_time": "0:13:13", "throughput": 5551.01, "total_tokens": 14246336} +{"current_steps": 28940, "total_steps": 37885, "loss": 0.0003, "lr": 3.209151324595523e-07, "epoch": 3.8194536096080243, "percentage": 76.39, "elapsed_time": "0:42:46", "remaining_time": "0:13:13", "throughput": 5551.14, "total_tokens": 14248512} +{"current_steps": 28945, "total_steps": 37885, "loss": 0.0657, "lr": 3.205770245783267e-07, "epoch": 3.820113501385773, "percentage": 76.4, "elapsed_time": "0:42:47", "remaining_time": "0:13:12", "throughput": 5551.36, "total_tokens": 14250944} +{"current_steps": 28950, "total_steps": 37885, "loss": 0.1313, "lr": 3.202390608997072e-07, "epoch": 3.8207733931635213, "percentage": 76.42, "elapsed_time": "0:42:47", "remaining_time": "0:13:12", "throughput": 5551.66, "total_tokens": 14253568} +{"current_steps": 28955, "total_steps": 37885, "loss": 0.0, "lr": 3.1990124149542465e-07, "epoch": 3.8214332849412695, "percentage": 76.43, "elapsed_time": "0:42:47", "remaining_time": "0:13:11", "throughput": 5551.91, "total_tokens": 14256064} +{"current_steps": 28960, "total_steps": 37885, "loss": 0.1208, "lr": 3.1956356643717896e-07, "epoch": 3.822093176719018, "percentage": 76.44, "elapsed_time": "0:42:48", "remaining_time": "0:13:11", "throughput": 5552.06, "total_tokens": 14258304} +{"current_steps": 28965, "total_steps": 37885, "loss": 0.0001, "lr": 3.1922603579663877e-07, "epoch": 3.8227530684967665, "percentage": 76.46, "elapsed_time": "0:42:48", "remaining_time": "0:13:10", "throughput": 5552.24, "total_tokens": 14260608} +{"current_steps": 28970, "total_steps": 37885, "loss": 0.0001, "lr": 3.188886496454426e-07, "epoch": 3.823412960274515, "percentage": 76.47, "elapsed_time": "0:42:48", "remaining_time": "0:13:10", "throughput": 5552.47, "total_tokens": 14263040} +{"current_steps": 28975, "total_steps": 37885, "loss": 0.0844, "lr": 3.185514080551986e-07, "epoch": 3.8240728520522635, "percentage": 76.48, "elapsed_time": "0:42:49", "remaining_time": "0:13:10", "throughput": 5552.63, "total_tokens": 14265344} +{"current_steps": 28980, "total_steps": 37885, "loss": 0.0, "lr": 3.1821431109748344e-07, "epoch": 3.824732743830012, "percentage": 76.49, "elapsed_time": "0:42:49", "remaining_time": "0:13:09", "throughput": 5552.9, "total_tokens": 14267904} +{"current_steps": 28985, "total_steps": 37885, "loss": 0.1095, "lr": 3.178773588438438e-07, "epoch": 3.82539263560776, "percentage": 76.51, "elapsed_time": "0:42:49", "remaining_time": "0:13:09", "throughput": 5553.13, "total_tokens": 14270400} +{"current_steps": 28990, "total_steps": 37885, "loss": 0.0, "lr": 3.1754055136579463e-07, "epoch": 3.826052527385509, "percentage": 76.52, "elapsed_time": "0:42:50", "remaining_time": "0:13:08", "throughput": 5553.32, "total_tokens": 14272768} +{"current_steps": 28995, "total_steps": 37885, "loss": 0.0001, "lr": 3.172038887348221e-07, "epoch": 3.826712419163257, "percentage": 76.53, "elapsed_time": "0:42:50", "remaining_time": "0:13:08", "throughput": 5553.51, "total_tokens": 14275136} +{"current_steps": 29000, "total_steps": 37885, "loss": 0.0001, "lr": 3.168673710223797e-07, "epoch": 3.827372310941006, "percentage": 76.55, "elapsed_time": "0:42:50", "remaining_time": "0:13:07", "throughput": 5553.77, "total_tokens": 14277696} +{"current_steps": 29005, "total_steps": 37885, "loss": 0.0001, "lr": 3.165309982998903e-07, "epoch": 3.828032202718754, "percentage": 76.56, "elapsed_time": "0:42:51", "remaining_time": "0:13:07", "throughput": 5553.9, "total_tokens": 14279872} +{"current_steps": 29010, "total_steps": 37885, "loss": 0.0001, "lr": 3.161947706387479e-07, "epoch": 3.8286920944965024, "percentage": 76.57, "elapsed_time": "0:42:51", "remaining_time": "0:13:06", "throughput": 5554.16, "total_tokens": 14282432} +{"current_steps": 29015, "total_steps": 37885, "loss": 0.0004, "lr": 3.1585868811031337e-07, "epoch": 3.829351986274251, "percentage": 76.59, "elapsed_time": "0:42:51", "remaining_time": "0:13:06", "throughput": 5554.37, "total_tokens": 14284864} +{"current_steps": 29020, "total_steps": 37885, "loss": 0.0003, "lr": 3.155227507859185e-07, "epoch": 3.8300118780519994, "percentage": 76.6, "elapsed_time": "0:42:52", "remaining_time": "0:13:05", "throughput": 5554.59, "total_tokens": 14287296} +{"current_steps": 29025, "total_steps": 37885, "loss": 0.0674, "lr": 3.1518695873686285e-07, "epoch": 3.830671769829748, "percentage": 76.61, "elapsed_time": "0:42:52", "remaining_time": "0:13:05", "throughput": 5554.88, "total_tokens": 14289920} +{"current_steps": 29030, "total_steps": 37885, "loss": 0.0001, "lr": 3.1485131203441605e-07, "epoch": 3.8313316616074964, "percentage": 76.63, "elapsed_time": "0:42:52", "remaining_time": "0:13:04", "throughput": 5555.12, "total_tokens": 14292416} +{"current_steps": 29035, "total_steps": 37885, "loss": 0.0065, "lr": 3.1451581074981726e-07, "epoch": 3.8319915533852447, "percentage": 76.64, "elapsed_time": "0:42:53", "remaining_time": "0:13:04", "throughput": 5555.23, "total_tokens": 14294592} +{"current_steps": 29040, "total_steps": 37885, "loss": 0.0003, "lr": 3.141804549542735e-07, "epoch": 3.8326514451629934, "percentage": 76.65, "elapsed_time": "0:42:53", "remaining_time": "0:13:03", "throughput": 5555.48, "total_tokens": 14297088} +{"current_steps": 29045, "total_steps": 37885, "loss": 0.1579, "lr": 3.138452447189617e-07, "epoch": 3.8333113369407417, "percentage": 76.67, "elapsed_time": "0:42:53", "remaining_time": "0:13:03", "throughput": 5555.76, "total_tokens": 14299712} +{"current_steps": 29050, "total_steps": 37885, "loss": 0.0003, "lr": 3.1351018011502837e-07, "epoch": 3.8339712287184904, "percentage": 76.68, "elapsed_time": "0:42:54", "remaining_time": "0:13:02", "throughput": 5555.89, "total_tokens": 14301888} +{"current_steps": 29055, "total_steps": 37885, "loss": 0.0567, "lr": 3.1317526121358785e-07, "epoch": 3.8346311204962387, "percentage": 76.69, "elapsed_time": "0:42:54", "remaining_time": "0:13:02", "throughput": 5556.09, "total_tokens": 14304256} +{"current_steps": 29060, "total_steps": 37885, "loss": 0.0001, "lr": 3.128404880857244e-07, "epoch": 3.835291012273987, "percentage": 76.71, "elapsed_time": "0:42:54", "remaining_time": "0:13:01", "throughput": 5556.34, "total_tokens": 14306752} +{"current_steps": 29065, "total_steps": 37885, "loss": 0.0005, "lr": 3.125058608024914e-07, "epoch": 3.8359509040517357, "percentage": 76.72, "elapsed_time": "0:42:55", "remaining_time": "0:13:01", "throughput": 5556.59, "total_tokens": 14309248} +{"current_steps": 29070, "total_steps": 37885, "loss": 0.0164, "lr": 3.1217137943491144e-07, "epoch": 3.836610795829484, "percentage": 76.73, "elapsed_time": "0:42:55", "remaining_time": "0:13:00", "throughput": 5556.88, "total_tokens": 14311872} +{"current_steps": 29075, "total_steps": 37885, "loss": 0.0001, "lr": 3.1183704405397494e-07, "epoch": 3.8372706876072327, "percentage": 76.75, "elapsed_time": "0:42:55", "remaining_time": "0:13:00", "throughput": 5557.13, "total_tokens": 14314368} +{"current_steps": 29080, "total_steps": 37885, "loss": 0.0381, "lr": 3.1150285473064255e-07, "epoch": 3.837930579384981, "percentage": 76.76, "elapsed_time": "0:42:56", "remaining_time": "0:13:00", "throughput": 5557.37, "total_tokens": 14316864} +{"current_steps": 29085, "total_steps": 37885, "loss": 0.0239, "lr": 3.1116881153584387e-07, "epoch": 3.8385904711627292, "percentage": 76.77, "elapsed_time": "0:42:56", "remaining_time": "0:12:59", "throughput": 5557.61, "total_tokens": 14319360} +{"current_steps": 29090, "total_steps": 37885, "loss": 0.0, "lr": 3.108349145404764e-07, "epoch": 3.8392503629404775, "percentage": 76.79, "elapsed_time": "0:42:56", "remaining_time": "0:12:59", "throughput": 5557.93, "total_tokens": 14322048} +{"current_steps": 29095, "total_steps": 37885, "loss": 0.0382, "lr": 3.1050116381540793e-07, "epoch": 3.8399102547182262, "percentage": 76.8, "elapsed_time": "0:42:57", "remaining_time": "0:12:58", "throughput": 5558.17, "total_tokens": 14324480} +{"current_steps": 29100, "total_steps": 37885, "loss": 0.0021, "lr": 3.101675594314747e-07, "epoch": 3.8405701464959745, "percentage": 76.81, "elapsed_time": "0:42:57", "remaining_time": "0:12:58", "throughput": 5558.42, "total_tokens": 14326976} +{"current_steps": 29105, "total_steps": 37885, "loss": 0.0002, "lr": 3.098341014594813e-07, "epoch": 3.8412300382737232, "percentage": 76.82, "elapsed_time": "0:42:57", "remaining_time": "0:12:57", "throughput": 5558.73, "total_tokens": 14329600} +{"current_steps": 29110, "total_steps": 37885, "loss": 0.0001, "lr": 3.0950078997020214e-07, "epoch": 3.8418899300514715, "percentage": 76.84, "elapsed_time": "0:42:58", "remaining_time": "0:12:57", "throughput": 5558.94, "total_tokens": 14331968} +{"current_steps": 29115, "total_steps": 37885, "loss": 0.0719, "lr": 3.0916762503438e-07, "epoch": 3.84254982182922, "percentage": 76.85, "elapsed_time": "0:42:58", "remaining_time": "0:12:56", "throughput": 5559.28, "total_tokens": 14334720} +{"current_steps": 29120, "total_steps": 37885, "loss": 0.0002, "lr": 3.0883460672272724e-07, "epoch": 3.8432097136069685, "percentage": 76.86, "elapsed_time": "0:42:58", "remaining_time": "0:12:56", "throughput": 5559.48, "total_tokens": 14337088} +{"current_steps": 29125, "total_steps": 37885, "loss": 0.001, "lr": 3.0850173510592415e-07, "epoch": 3.843869605384717, "percentage": 76.88, "elapsed_time": "0:42:59", "remaining_time": "0:12:55", "throughput": 5559.61, "total_tokens": 14339264} +{"current_steps": 29130, "total_steps": 37885, "loss": 0.0442, "lr": 3.0816901025461974e-07, "epoch": 3.8445294971624655, "percentage": 76.89, "elapsed_time": "0:42:59", "remaining_time": "0:12:55", "throughput": 5559.8, "total_tokens": 14341632} +{"current_steps": 29135, "total_steps": 37885, "loss": 0.0, "lr": 3.0783643223943367e-07, "epoch": 3.845189388940214, "percentage": 76.9, "elapsed_time": "0:42:59", "remaining_time": "0:12:54", "throughput": 5559.95, "total_tokens": 14343872} +{"current_steps": 29140, "total_steps": 37885, "loss": 0.0003, "lr": 3.075040011309522e-07, "epoch": 3.845849280717962, "percentage": 76.92, "elapsed_time": "0:43:00", "remaining_time": "0:12:54", "throughput": 5560.14, "total_tokens": 14346240} +{"current_steps": 29145, "total_steps": 37885, "loss": 0.0001, "lr": 3.0717171699973197e-07, "epoch": 3.846509172495711, "percentage": 76.93, "elapsed_time": "0:43:00", "remaining_time": "0:12:53", "throughput": 5560.31, "total_tokens": 14348544} +{"current_steps": 29150, "total_steps": 37885, "loss": 0.0, "lr": 3.068395799162976e-07, "epoch": 3.847169064273459, "percentage": 76.94, "elapsed_time": "0:43:00", "remaining_time": "0:12:53", "throughput": 5560.45, "total_tokens": 14350784} +{"current_steps": 29155, "total_steps": 37885, "loss": 0.0, "lr": 3.0650758995114335e-07, "epoch": 3.847828956051208, "percentage": 76.96, "elapsed_time": "0:43:01", "remaining_time": "0:12:52", "throughput": 5560.74, "total_tokens": 14353408} +{"current_steps": 29160, "total_steps": 37885, "loss": 0.0, "lr": 3.061757471747313e-07, "epoch": 3.848488847828956, "percentage": 76.97, "elapsed_time": "0:43:01", "remaining_time": "0:12:52", "throughput": 5560.89, "total_tokens": 14355712} +{"current_steps": 29165, "total_steps": 37885, "loss": 0.0089, "lr": 3.058440516574918e-07, "epoch": 3.8491487396067043, "percentage": 76.98, "elapsed_time": "0:43:01", "remaining_time": "0:12:51", "throughput": 5561.06, "total_tokens": 14358016} +{"current_steps": 29170, "total_steps": 37885, "loss": 0.0337, "lr": 3.055125034698265e-07, "epoch": 3.849808631384453, "percentage": 77.0, "elapsed_time": "0:43:02", "remaining_time": "0:12:51", "throughput": 5561.31, "total_tokens": 14360576} +{"current_steps": 29175, "total_steps": 37885, "loss": 0.0004, "lr": 3.051811026821027e-07, "epoch": 3.8504685231622013, "percentage": 77.01, "elapsed_time": "0:43:02", "remaining_time": "0:12:51", "throughput": 5561.53, "total_tokens": 14363008} +{"current_steps": 29180, "total_steps": 37885, "loss": 0.0003, "lr": 3.04849849364659e-07, "epoch": 3.85112841493995, "percentage": 77.02, "elapsed_time": "0:43:02", "remaining_time": "0:12:50", "throughput": 5561.73, "total_tokens": 14365376} +{"current_steps": 29185, "total_steps": 37885, "loss": 0.0001, "lr": 3.045187435878003e-07, "epoch": 3.8517883067176983, "percentage": 77.04, "elapsed_time": "0:43:03", "remaining_time": "0:12:50", "throughput": 5561.96, "total_tokens": 14367872} +{"current_steps": 29190, "total_steps": 37885, "loss": 0.0001, "lr": 3.041877854218021e-07, "epoch": 3.8524481984954466, "percentage": 77.05, "elapsed_time": "0:43:03", "remaining_time": "0:12:49", "throughput": 5562.18, "total_tokens": 14370304} +{"current_steps": 29195, "total_steps": 37885, "loss": 0.0, "lr": 3.0385697493690807e-07, "epoch": 3.8531080902731953, "percentage": 77.06, "elapsed_time": "0:43:03", "remaining_time": "0:12:49", "throughput": 5562.48, "total_tokens": 14372928} +{"current_steps": 29200, "total_steps": 37885, "loss": 0.0004, "lr": 3.0352631220332945e-07, "epoch": 3.8537679820509436, "percentage": 77.08, "elapsed_time": "0:43:04", "remaining_time": "0:12:48", "throughput": 5562.71, "total_tokens": 14375360} +{"current_steps": 29205, "total_steps": 37885, "loss": 0.1616, "lr": 3.031957972912482e-07, "epoch": 3.8544278738286923, "percentage": 77.09, "elapsed_time": "0:43:04", "remaining_time": "0:12:48", "throughput": 5562.98, "total_tokens": 14377920} +{"current_steps": 29210, "total_steps": 37885, "loss": 0.0, "lr": 3.028654302708131e-07, "epoch": 3.8550877656064406, "percentage": 77.1, "elapsed_time": "0:43:04", "remaining_time": "0:12:47", "throughput": 5563.22, "total_tokens": 14380352} +{"current_steps": 29215, "total_steps": 37885, "loss": 0.0007, "lr": 3.025352112121419e-07, "epoch": 3.855747657384189, "percentage": 77.11, "elapsed_time": "0:43:05", "remaining_time": "0:12:47", "throughput": 5563.49, "total_tokens": 14382912} +{"current_steps": 29220, "total_steps": 37885, "loss": 0.0, "lr": 3.022051401853214e-07, "epoch": 3.856407549161937, "percentage": 77.13, "elapsed_time": "0:43:05", "remaining_time": "0:12:46", "throughput": 5563.73, "total_tokens": 14385344} +{"current_steps": 29225, "total_steps": 37885, "loss": 0.0001, "lr": 3.018752172604069e-07, "epoch": 3.857067440939686, "percentage": 77.14, "elapsed_time": "0:43:05", "remaining_time": "0:12:46", "throughput": 5564.0, "total_tokens": 14387840} +{"current_steps": 29230, "total_steps": 37885, "loss": 0.0002, "lr": 3.015454425074224e-07, "epoch": 3.857727332717434, "percentage": 77.15, "elapsed_time": "0:43:06", "remaining_time": "0:12:45", "throughput": 5564.14, "total_tokens": 14390016} +{"current_steps": 29235, "total_steps": 37885, "loss": 0.0, "lr": 3.0121581599635973e-07, "epoch": 3.858387224495183, "percentage": 77.17, "elapsed_time": "0:43:06", "remaining_time": "0:12:45", "throughput": 5564.35, "total_tokens": 14392384} +{"current_steps": 29240, "total_steps": 37885, "loss": 0.0, "lr": 3.0088633779717975e-07, "epoch": 3.859047116272931, "percentage": 77.18, "elapsed_time": "0:43:06", "remaining_time": "0:12:44", "throughput": 5564.56, "total_tokens": 14394752} +{"current_steps": 29245, "total_steps": 37885, "loss": 0.0004, "lr": 3.0055700797981244e-07, "epoch": 3.8597070080506795, "percentage": 77.19, "elapsed_time": "0:43:07", "remaining_time": "0:12:44", "throughput": 5564.79, "total_tokens": 14397184} +{"current_steps": 29250, "total_steps": 37885, "loss": 0.0, "lr": 3.002278266141548e-07, "epoch": 3.860366899828428, "percentage": 77.21, "elapsed_time": "0:43:07", "remaining_time": "0:12:43", "throughput": 5565.07, "total_tokens": 14399744} +{"current_steps": 29255, "total_steps": 37885, "loss": 0.0, "lr": 2.9989879377007375e-07, "epoch": 3.8610267916061765, "percentage": 77.22, "elapsed_time": "0:43:07", "remaining_time": "0:12:43", "throughput": 5565.29, "total_tokens": 14402112} +{"current_steps": 29260, "total_steps": 37885, "loss": 0.0, "lr": 2.995699095174041e-07, "epoch": 3.861686683383925, "percentage": 77.23, "elapsed_time": "0:43:08", "remaining_time": "0:12:42", "throughput": 5565.52, "total_tokens": 14404544} +{"current_steps": 29265, "total_steps": 37885, "loss": 0.0, "lr": 2.9924117392594893e-07, "epoch": 3.8623465751616735, "percentage": 77.25, "elapsed_time": "0:43:08", "remaining_time": "0:12:42", "throughput": 5565.66, "total_tokens": 14406720} +{"current_steps": 29270, "total_steps": 37885, "loss": 0.0, "lr": 2.9891258706547997e-07, "epoch": 3.8630064669394217, "percentage": 77.26, "elapsed_time": "0:43:08", "remaining_time": "0:12:41", "throughput": 5566.01, "total_tokens": 14409472} +{"current_steps": 29275, "total_steps": 37885, "loss": 0.0366, "lr": 2.9858414900573757e-07, "epoch": 3.8636663587171705, "percentage": 77.27, "elapsed_time": "0:43:09", "remaining_time": "0:12:41", "throughput": 5566.24, "total_tokens": 14411904} +{"current_steps": 29280, "total_steps": 37885, "loss": 0.0411, "lr": 2.9825585981643064e-07, "epoch": 3.8643262504949187, "percentage": 77.29, "elapsed_time": "0:43:09", "remaining_time": "0:12:41", "throughput": 5566.5, "total_tokens": 14414400} +{"current_steps": 29285, "total_steps": 37885, "loss": 0.0, "lr": 2.9792771956723537e-07, "epoch": 3.8649861422726675, "percentage": 77.3, "elapsed_time": "0:43:09", "remaining_time": "0:12:40", "throughput": 5566.75, "total_tokens": 14416896} +{"current_steps": 29290, "total_steps": 37885, "loss": 0.0, "lr": 2.9759972832779776e-07, "epoch": 3.8656460340504157, "percentage": 77.31, "elapsed_time": "0:43:10", "remaining_time": "0:12:40", "throughput": 5566.98, "total_tokens": 14419328} +{"current_steps": 29295, "total_steps": 37885, "loss": 0.0002, "lr": 2.972718861677317e-07, "epoch": 3.866305925828164, "percentage": 77.33, "elapsed_time": "0:43:10", "remaining_time": "0:12:39", "throughput": 5567.19, "total_tokens": 14421696} +{"current_steps": 29300, "total_steps": 37885, "loss": 0.0, "lr": 2.969441931566188e-07, "epoch": 3.8669658176059127, "percentage": 77.34, "elapsed_time": "0:43:10", "remaining_time": "0:12:39", "throughput": 5567.35, "total_tokens": 14423936} +{"current_steps": 29305, "total_steps": 37885, "loss": 0.0001, "lr": 2.9661664936400964e-07, "epoch": 3.867625709383661, "percentage": 77.35, "elapsed_time": "0:43:11", "remaining_time": "0:12:38", "throughput": 5567.61, "total_tokens": 14426432} +{"current_steps": 29310, "total_steps": 37885, "loss": 0.0, "lr": 2.9628925485942357e-07, "epoch": 3.8682856011614097, "percentage": 77.37, "elapsed_time": "0:43:11", "remaining_time": "0:12:38", "throughput": 5567.77, "total_tokens": 14428672} +{"current_steps": 29315, "total_steps": 37885, "loss": 0.0001, "lr": 2.9596200971234687e-07, "epoch": 3.868945492939158, "percentage": 77.38, "elapsed_time": "0:43:11", "remaining_time": "0:12:37", "throughput": 5567.98, "total_tokens": 14431040} +{"current_steps": 29320, "total_steps": 37885, "loss": 0.0657, "lr": 2.956349139922357e-07, "epoch": 3.8696053847169063, "percentage": 77.39, "elapsed_time": "0:43:12", "remaining_time": "0:12:37", "throughput": 5568.22, "total_tokens": 14433472} +{"current_steps": 29325, "total_steps": 37885, "loss": 0.0001, "lr": 2.9530796776851283e-07, "epoch": 3.870265276494655, "percentage": 77.41, "elapsed_time": "0:43:12", "remaining_time": "0:12:36", "throughput": 5568.48, "total_tokens": 14435968} +{"current_steps": 29330, "total_steps": 37885, "loss": 0.0, "lr": 2.9498117111057155e-07, "epoch": 3.8709251682724033, "percentage": 77.42, "elapsed_time": "0:43:12", "remaining_time": "0:12:36", "throughput": 5568.69, "total_tokens": 14438336} +{"current_steps": 29335, "total_steps": 37885, "loss": 0.0001, "lr": 2.9465452408777126e-07, "epoch": 3.871585060050152, "percentage": 77.43, "elapsed_time": "0:43:13", "remaining_time": "0:12:35", "throughput": 5568.96, "total_tokens": 14440896} +{"current_steps": 29340, "total_steps": 37885, "loss": 0.0, "lr": 2.943280267694399e-07, "epoch": 3.8722449518279003, "percentage": 77.44, "elapsed_time": "0:43:13", "remaining_time": "0:12:35", "throughput": 5569.22, "total_tokens": 14443392} +{"current_steps": 29345, "total_steps": 37885, "loss": 0.0009, "lr": 2.940016792248754e-07, "epoch": 3.8729048436056486, "percentage": 77.46, "elapsed_time": "0:43:13", "remaining_time": "0:12:34", "throughput": 5569.5, "total_tokens": 14445952} +{"current_steps": 29350, "total_steps": 37885, "loss": 0.0611, "lr": 2.936754815233417e-07, "epoch": 3.873564735383397, "percentage": 77.47, "elapsed_time": "0:43:14", "remaining_time": "0:12:34", "throughput": 5569.69, "total_tokens": 14448256} +{"current_steps": 29355, "total_steps": 37885, "loss": 0.0, "lr": 2.933494337340726e-07, "epoch": 3.8742246271611456, "percentage": 77.48, "elapsed_time": "0:43:14", "remaining_time": "0:12:33", "throughput": 5569.9, "total_tokens": 14450624} +{"current_steps": 29360, "total_steps": 37885, "loss": 0.0, "lr": 2.930235359262687e-07, "epoch": 3.8748845189388943, "percentage": 77.5, "elapsed_time": "0:43:14", "remaining_time": "0:12:33", "throughput": 5570.13, "total_tokens": 14453056} +{"current_steps": 29365, "total_steps": 37885, "loss": 0.0, "lr": 2.9269778816909985e-07, "epoch": 3.8755444107166426, "percentage": 77.51, "elapsed_time": "0:43:15", "remaining_time": "0:12:32", "throughput": 5570.41, "total_tokens": 14455616} +{"current_steps": 29370, "total_steps": 37885, "loss": 0.0, "lr": 2.9237219053170383e-07, "epoch": 3.876204302494391, "percentage": 77.52, "elapsed_time": "0:43:15", "remaining_time": "0:12:32", "throughput": 5570.54, "total_tokens": 14457792} +{"current_steps": 29375, "total_steps": 37885, "loss": 0.0, "lr": 2.920467430831858e-07, "epoch": 3.876864194272139, "percentage": 77.54, "elapsed_time": "0:43:15", "remaining_time": "0:12:31", "throughput": 5570.73, "total_tokens": 14460096} +{"current_steps": 29380, "total_steps": 37885, "loss": 0.0019, "lr": 2.917214458926199e-07, "epoch": 3.877524086049888, "percentage": 77.55, "elapsed_time": "0:43:16", "remaining_time": "0:12:31", "throughput": 5571.14, "total_tokens": 14463040} +{"current_steps": 29385, "total_steps": 37885, "loss": 0.0, "lr": 2.913962990290486e-07, "epoch": 3.878183977827636, "percentage": 77.56, "elapsed_time": "0:43:16", "remaining_time": "0:12:31", "throughput": 5571.37, "total_tokens": 14465472} +{"current_steps": 29390, "total_steps": 37885, "loss": 0.0891, "lr": 2.910713025614812e-07, "epoch": 3.878843869605385, "percentage": 77.58, "elapsed_time": "0:43:16", "remaining_time": "0:12:30", "throughput": 5571.63, "total_tokens": 14467968} +{"current_steps": 29395, "total_steps": 37885, "loss": 0.0, "lr": 2.9074645655889604e-07, "epoch": 3.879503761383133, "percentage": 77.59, "elapsed_time": "0:43:17", "remaining_time": "0:12:30", "throughput": 5571.95, "total_tokens": 14470656} +{"current_steps": 29400, "total_steps": 37885, "loss": 0.0004, "lr": 2.904217610902396e-07, "epoch": 3.8801636531608814, "percentage": 77.6, "elapsed_time": "0:43:17", "remaining_time": "0:12:29", "throughput": 5572.04, "total_tokens": 14472704} +{"current_steps": 29405, "total_steps": 37885, "loss": 0.0, "lr": 2.900972162244263e-07, "epoch": 3.88082354493863, "percentage": 77.62, "elapsed_time": "0:43:17", "remaining_time": "0:12:29", "throughput": 5572.27, "total_tokens": 14475136} +{"current_steps": 29410, "total_steps": 37885, "loss": 0.0491, "lr": 2.897728220303378e-07, "epoch": 3.8814834367163784, "percentage": 77.63, "elapsed_time": "0:43:18", "remaining_time": "0:12:28", "throughput": 5572.48, "total_tokens": 14477504} +{"current_steps": 29415, "total_steps": 37885, "loss": 0.0239, "lr": 2.894485785768248e-07, "epoch": 3.882143328494127, "percentage": 77.64, "elapsed_time": "0:43:18", "remaining_time": "0:12:28", "throughput": 5572.71, "total_tokens": 14479936} +{"current_steps": 29420, "total_steps": 37885, "loss": 0.1459, "lr": 2.891244859327059e-07, "epoch": 3.8828032202718754, "percentage": 77.66, "elapsed_time": "0:43:18", "remaining_time": "0:12:27", "throughput": 5572.94, "total_tokens": 14482368} +{"current_steps": 29425, "total_steps": 37885, "loss": 0.0, "lr": 2.888005441667668e-07, "epoch": 3.8834631120496237, "percentage": 77.67, "elapsed_time": "0:43:19", "remaining_time": "0:12:27", "throughput": 5573.14, "total_tokens": 14484736} +{"current_steps": 29430, "total_steps": 37885, "loss": 0.0049, "lr": 2.88476753347762e-07, "epoch": 3.8841230038273724, "percentage": 77.68, "elapsed_time": "0:43:19", "remaining_time": "0:12:26", "throughput": 5573.42, "total_tokens": 14487296} +{"current_steps": 29435, "total_steps": 37885, "loss": 0.0, "lr": 2.881531135444143e-07, "epoch": 3.8847828956051207, "percentage": 77.7, "elapsed_time": "0:43:19", "remaining_time": "0:12:26", "throughput": 5573.5, "total_tokens": 14489344} +{"current_steps": 29440, "total_steps": 37885, "loss": 0.0386, "lr": 2.878296248254131e-07, "epoch": 3.8854427873828694, "percentage": 77.71, "elapsed_time": "0:43:20", "remaining_time": "0:12:25", "throughput": 5573.84, "total_tokens": 14492096} +{"current_steps": 29445, "total_steps": 37885, "loss": 0.0065, "lr": 2.8750628725941685e-07, "epoch": 3.8861026791606177, "percentage": 77.72, "elapsed_time": "0:43:20", "remaining_time": "0:12:25", "throughput": 5574.13, "total_tokens": 14494720} +{"current_steps": 29450, "total_steps": 37885, "loss": 0.0, "lr": 2.8718310091505173e-07, "epoch": 3.886762570938366, "percentage": 77.74, "elapsed_time": "0:43:20", "remaining_time": "0:12:24", "throughput": 5574.41, "total_tokens": 14497280} +{"current_steps": 29455, "total_steps": 37885, "loss": 0.0001, "lr": 2.8686006586091183e-07, "epoch": 3.8874224627161147, "percentage": 77.75, "elapsed_time": "0:43:21", "remaining_time": "0:12:24", "throughput": 5574.7, "total_tokens": 14499904} +{"current_steps": 29460, "total_steps": 37885, "loss": 0.0854, "lr": 2.8653718216555854e-07, "epoch": 3.888082354493863, "percentage": 77.76, "elapsed_time": "0:43:21", "remaining_time": "0:12:23", "throughput": 5575.07, "total_tokens": 14502784} +{"current_steps": 29465, "total_steps": 37885, "loss": 0.0, "lr": 2.8621444989752184e-07, "epoch": 3.8887422462716117, "percentage": 77.77, "elapsed_time": "0:43:21", "remaining_time": "0:12:23", "throughput": 5575.2, "total_tokens": 14504960} +{"current_steps": 29470, "total_steps": 37885, "loss": 0.0, "lr": 2.858918691252997e-07, "epoch": 3.88940213804936, "percentage": 77.79, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.48, "total_tokens": 14507520} +{"current_steps": 29475, "total_steps": 37885, "loss": 0.0, "lr": 2.855694399173568e-07, "epoch": 3.8900620298271082, "percentage": 77.8, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.73, "total_tokens": 14510016} +{"current_steps": 29480, "total_steps": 37885, "loss": 0.0009, "lr": 2.8524716234212684e-07, "epoch": 3.890721921604857, "percentage": 77.81, "elapsed_time": "0:43:22", "remaining_time": "0:12:22", "throughput": 5575.98, "total_tokens": 14512512} +{"current_steps": 29485, "total_steps": 37885, "loss": 0.0, "lr": 2.849250364680108e-07, "epoch": 3.8913818133826052, "percentage": 77.83, "elapsed_time": "0:43:23", "remaining_time": "0:12:21", "throughput": 5576.09, "total_tokens": 14514624} +{"current_steps": 29490, "total_steps": 37885, "loss": 0.0, "lr": 2.846030623633778e-07, "epoch": 3.892041705160354, "percentage": 77.84, "elapsed_time": "0:43:23", "remaining_time": "0:12:21", "throughput": 5576.28, "total_tokens": 14516928} +{"current_steps": 29495, "total_steps": 37885, "loss": 0.0, "lr": 2.842812400965645e-07, "epoch": 3.8927015969381022, "percentage": 77.85, "elapsed_time": "0:43:23", "remaining_time": "0:12:20", "throughput": 5576.49, "total_tokens": 14519296} +{"current_steps": 29500, "total_steps": 37885, "loss": 0.1298, "lr": 2.839595697358744e-07, "epoch": 3.8933614887158505, "percentage": 77.87, "elapsed_time": "0:43:23", "remaining_time": "0:12:20", "throughput": 5576.72, "total_tokens": 14521728} +{"current_steps": 29505, "total_steps": 37885, "loss": 0.0023, "lr": 2.836380513495812e-07, "epoch": 3.894021380493599, "percentage": 77.88, "elapsed_time": "0:43:24", "remaining_time": "0:12:19", "throughput": 5576.98, "total_tokens": 14524224} +{"current_steps": 29510, "total_steps": 37885, "loss": 0.0001, "lr": 2.8331668500592374e-07, "epoch": 3.8946812722713475, "percentage": 77.89, "elapsed_time": "0:43:24", "remaining_time": "0:12:19", "throughput": 5577.3, "total_tokens": 14526912} +{"current_steps": 29515, "total_steps": 37885, "loss": 0.0003, "lr": 2.829954707731104e-07, "epoch": 3.895341164049096, "percentage": 77.91, "elapsed_time": "0:43:24", "remaining_time": "0:12:18", "throughput": 5577.51, "total_tokens": 14529280} +{"current_steps": 29520, "total_steps": 37885, "loss": 0.0008, "lr": 2.826744087193159e-07, "epoch": 3.8960010558268445, "percentage": 77.92, "elapsed_time": "0:43:25", "remaining_time": "0:12:18", "throughput": 5577.76, "total_tokens": 14531776} +{"current_steps": 29525, "total_steps": 37885, "loss": 0.0009, "lr": 2.823534989126838e-07, "epoch": 3.896660947604593, "percentage": 77.93, "elapsed_time": "0:43:25", "remaining_time": "0:12:17", "throughput": 5577.89, "total_tokens": 14533952} +{"current_steps": 29530, "total_steps": 37885, "loss": 0.0, "lr": 2.820327414213249e-07, "epoch": 3.897320839382341, "percentage": 77.95, "elapsed_time": "0:43:25", "remaining_time": "0:12:17", "throughput": 5578.03, "total_tokens": 14536128} +{"current_steps": 29535, "total_steps": 37885, "loss": 0.024, "lr": 2.8171213631331714e-07, "epoch": 3.89798073116009, "percentage": 77.96, "elapsed_time": "0:43:26", "remaining_time": "0:12:16", "throughput": 5578.44, "total_tokens": 14539072} +{"current_steps": 29540, "total_steps": 37885, "loss": 0.0002, "lr": 2.813916836567074e-07, "epoch": 3.898640622937838, "percentage": 77.97, "elapsed_time": "0:43:26", "remaining_time": "0:12:16", "throughput": 5578.71, "total_tokens": 14541632} +{"current_steps": 29545, "total_steps": 37885, "loss": 0.2078, "lr": 2.810713835195092e-07, "epoch": 3.899300514715587, "percentage": 77.99, "elapsed_time": "0:43:26", "remaining_time": "0:12:15", "throughput": 5578.8, "total_tokens": 14543680} +{"current_steps": 29550, "total_steps": 37885, "loss": 0.0, "lr": 2.807512359697034e-07, "epoch": 3.899960406493335, "percentage": 78.0, "elapsed_time": "0:43:27", "remaining_time": "0:12:15", "throughput": 5579.01, "total_tokens": 14546048} +{"current_steps": 29555, "total_steps": 37885, "loss": 0.0412, "lr": 2.8043124107523943e-07, "epoch": 3.9006202982710834, "percentage": 78.01, "elapsed_time": "0:43:27", "remaining_time": "0:12:14", "throughput": 5579.24, "total_tokens": 14548480} +{"current_steps": 29560, "total_steps": 37885, "loss": 0.0004, "lr": 2.801113989040338e-07, "epoch": 3.901280190048832, "percentage": 78.03, "elapsed_time": "0:43:27", "remaining_time": "0:12:14", "throughput": 5579.5, "total_tokens": 14550976} +{"current_steps": 29565, "total_steps": 37885, "loss": 0.0, "lr": 2.7979170952397103e-07, "epoch": 3.9019400818265804, "percentage": 78.04, "elapsed_time": "0:43:28", "remaining_time": "0:12:14", "throughput": 5579.79, "total_tokens": 14553600} +{"current_steps": 29570, "total_steps": 37885, "loss": 0.0056, "lr": 2.7947217300290225e-07, "epoch": 3.902599973604329, "percentage": 78.05, "elapsed_time": "0:43:28", "remaining_time": "0:12:13", "throughput": 5580.07, "total_tokens": 14556160} +{"current_steps": 29575, "total_steps": 37885, "loss": 0.0337, "lr": 2.791527894086472e-07, "epoch": 3.9032598653820774, "percentage": 78.07, "elapsed_time": "0:43:28", "remaining_time": "0:12:13", "throughput": 5580.41, "total_tokens": 14558912} +{"current_steps": 29580, "total_steps": 37885, "loss": 0.002, "lr": 2.7883355880899286e-07, "epoch": 3.9039197571598256, "percentage": 78.08, "elapsed_time": "0:43:29", "remaining_time": "0:12:12", "throughput": 5580.67, "total_tokens": 14561408} +{"current_steps": 29585, "total_steps": 37885, "loss": 0.0005, "lr": 2.78514481271693e-07, "epoch": 3.9045796489375744, "percentage": 78.09, "elapsed_time": "0:43:29", "remaining_time": "0:12:12", "throughput": 5580.83, "total_tokens": 14563648} +{"current_steps": 29590, "total_steps": 37885, "loss": 0.0, "lr": 2.7819555686447004e-07, "epoch": 3.9052395407153226, "percentage": 78.1, "elapsed_time": "0:43:29", "remaining_time": "0:12:11", "throughput": 5580.99, "total_tokens": 14565888} +{"current_steps": 29595, "total_steps": 37885, "loss": 0.008, "lr": 2.7787678565501347e-07, "epoch": 3.9058994324930714, "percentage": 78.12, "elapsed_time": "0:43:30", "remaining_time": "0:12:11", "throughput": 5581.24, "total_tokens": 14568384} +{"current_steps": 29600, "total_steps": 37885, "loss": 0.0, "lr": 2.7755816771097963e-07, "epoch": 3.9065593242708196, "percentage": 78.13, "elapsed_time": "0:43:30", "remaining_time": "0:12:10", "throughput": 5581.33, "total_tokens": 14570432} +{"current_steps": 29605, "total_steps": 37885, "loss": 0.0154, "lr": 2.7723970309999324e-07, "epoch": 3.907219216048568, "percentage": 78.14, "elapsed_time": "0:43:30", "remaining_time": "0:12:10", "throughput": 5581.56, "total_tokens": 14572864} +{"current_steps": 29610, "total_steps": 37885, "loss": 0.0, "lr": 2.7692139188964594e-07, "epoch": 3.9078791078263166, "percentage": 78.16, "elapsed_time": "0:43:31", "remaining_time": "0:12:09", "throughput": 5581.73, "total_tokens": 14575104} +{"current_steps": 29615, "total_steps": 37885, "loss": 0.0083, "lr": 2.766032341474975e-07, "epoch": 3.908538999604065, "percentage": 78.17, "elapsed_time": "0:43:31", "remaining_time": "0:12:09", "throughput": 5582.0, "total_tokens": 14577664} +{"current_steps": 29620, "total_steps": 37885, "loss": 0.0004, "lr": 2.762852299410738e-07, "epoch": 3.9091988913818136, "percentage": 78.18, "elapsed_time": "0:43:31", "remaining_time": "0:12:08", "throughput": 5582.32, "total_tokens": 14580352} +{"current_steps": 29625, "total_steps": 37885, "loss": 0.0, "lr": 2.759673793378694e-07, "epoch": 3.909858783159562, "percentage": 78.2, "elapsed_time": "0:43:32", "remaining_time": "0:12:08", "throughput": 5582.55, "total_tokens": 14582784} +{"current_steps": 29630, "total_steps": 37885, "loss": 0.0001, "lr": 2.7564968240534594e-07, "epoch": 3.91051867493731, "percentage": 78.21, "elapsed_time": "0:43:32", "remaining_time": "0:12:07", "throughput": 5582.77, "total_tokens": 14585216} +{"current_steps": 29635, "total_steps": 37885, "loss": 0.0611, "lr": 2.753321392109318e-07, "epoch": 3.9111785667150585, "percentage": 78.22, "elapsed_time": "0:43:32", "remaining_time": "0:12:07", "throughput": 5582.98, "total_tokens": 14587584} +{"current_steps": 29640, "total_steps": 37885, "loss": 0.0001, "lr": 2.7501474982202345e-07, "epoch": 3.911838458492807, "percentage": 78.24, "elapsed_time": "0:43:33", "remaining_time": "0:12:06", "throughput": 5583.19, "total_tokens": 14589952} +{"current_steps": 29645, "total_steps": 37885, "loss": 0.0001, "lr": 2.7469751430598486e-07, "epoch": 3.9124983502705555, "percentage": 78.25, "elapsed_time": "0:43:33", "remaining_time": "0:12:06", "throughput": 5583.4, "total_tokens": 14592320} +{"current_steps": 29650, "total_steps": 37885, "loss": 0.0266, "lr": 2.743804327301462e-07, "epoch": 3.913158242048304, "percentage": 78.26, "elapsed_time": "0:43:33", "remaining_time": "0:12:05", "throughput": 5583.56, "total_tokens": 14594560} +{"current_steps": 29655, "total_steps": 37885, "loss": 0.0725, "lr": 2.7406350516180666e-07, "epoch": 3.9138181338260525, "percentage": 78.28, "elapsed_time": "0:43:34", "remaining_time": "0:12:05", "throughput": 5583.89, "total_tokens": 14597248} +{"current_steps": 29660, "total_steps": 37885, "loss": 0.0, "lr": 2.7374673166823057e-07, "epoch": 3.9144780256038008, "percentage": 78.29, "elapsed_time": "0:43:34", "remaining_time": "0:12:05", "throughput": 5584.04, "total_tokens": 14599488} +{"current_steps": 29665, "total_steps": 37885, "loss": 0.0, "lr": 2.7343011231665227e-07, "epoch": 3.9151379173815495, "percentage": 78.3, "elapsed_time": "0:43:34", "remaining_time": "0:12:04", "throughput": 5584.2, "total_tokens": 14601728} +{"current_steps": 29670, "total_steps": 37885, "loss": 0.0, "lr": 2.731136471742712e-07, "epoch": 3.9157978091592978, "percentage": 78.32, "elapsed_time": "0:43:35", "remaining_time": "0:12:04", "throughput": 5584.43, "total_tokens": 14604160} +{"current_steps": 29675, "total_steps": 37885, "loss": 0.0, "lr": 2.7279733630825417e-07, "epoch": 3.9164577009370465, "percentage": 78.33, "elapsed_time": "0:43:35", "remaining_time": "0:12:03", "throughput": 5584.66, "total_tokens": 14606592} +{"current_steps": 29680, "total_steps": 37885, "loss": 0.001, "lr": 2.7248117978573725e-07, "epoch": 3.9171175927147948, "percentage": 78.34, "elapsed_time": "0:43:35", "remaining_time": "0:12:03", "throughput": 5584.88, "total_tokens": 14609024} +{"current_steps": 29685, "total_steps": 37885, "loss": 0.1096, "lr": 2.721651776738212e-07, "epoch": 3.917777484492543, "percentage": 78.36, "elapsed_time": "0:43:36", "remaining_time": "0:12:02", "throughput": 5585.09, "total_tokens": 14611392} +{"current_steps": 29690, "total_steps": 37885, "loss": 0.0004, "lr": 2.71849330039576e-07, "epoch": 3.9184373762702918, "percentage": 78.37, "elapsed_time": "0:43:36", "remaining_time": "0:12:02", "throughput": 5585.3, "total_tokens": 14613760} +{"current_steps": 29695, "total_steps": 37885, "loss": 0.0, "lr": 2.715336369500374e-07, "epoch": 3.91909726804804, "percentage": 78.38, "elapsed_time": "0:43:36", "remaining_time": "0:12:01", "throughput": 5585.5, "total_tokens": 14616128} +{"current_steps": 29700, "total_steps": 37885, "loss": 0.0441, "lr": 2.712180984722091e-07, "epoch": 3.9197571598257888, "percentage": 78.4, "elapsed_time": "0:43:37", "remaining_time": "0:12:01", "throughput": 5585.82, "total_tokens": 14618816} +{"current_steps": 29705, "total_steps": 37885, "loss": 0.0002, "lr": 2.7090271467306235e-07, "epoch": 3.920417051603537, "percentage": 78.41, "elapsed_time": "0:43:37", "remaining_time": "0:12:00", "throughput": 5586.04, "total_tokens": 14621184} +{"current_steps": 29710, "total_steps": 37885, "loss": 0.0, "lr": 2.705874856195344e-07, "epoch": 3.9210769433812853, "percentage": 78.42, "elapsed_time": "0:43:37", "remaining_time": "0:12:00", "throughput": 5586.37, "total_tokens": 14623936} +{"current_steps": 29715, "total_steps": 37885, "loss": 0.0797, "lr": 2.702724113785305e-07, "epoch": 3.921736835159034, "percentage": 78.43, "elapsed_time": "0:43:38", "remaining_time": "0:11:59", "throughput": 5586.53, "total_tokens": 14626176} +{"current_steps": 29720, "total_steps": 37885, "loss": 0.0506, "lr": 2.6995749201692353e-07, "epoch": 3.9223967269367823, "percentage": 78.45, "elapsed_time": "0:43:38", "remaining_time": "0:11:59", "throughput": 5586.77, "total_tokens": 14628608} +{"current_steps": 29725, "total_steps": 37885, "loss": 0.0011, "lr": 2.696427276015518e-07, "epoch": 3.923056618714531, "percentage": 78.46, "elapsed_time": "0:43:38", "remaining_time": "0:11:58", "throughput": 5587.12, "total_tokens": 14631424} +{"current_steps": 29730, "total_steps": 37885, "loss": 0.0049, "lr": 2.693281181992225e-07, "epoch": 3.9237165104922793, "percentage": 78.47, "elapsed_time": "0:43:39", "remaining_time": "0:11:58", "throughput": 5587.33, "total_tokens": 14633792} +{"current_steps": 29735, "total_steps": 37885, "loss": 0.0009, "lr": 2.6901366387670885e-07, "epoch": 3.9243764022700276, "percentage": 78.49, "elapsed_time": "0:43:39", "remaining_time": "0:11:57", "throughput": 5587.61, "total_tokens": 14636352} +{"current_steps": 29740, "total_steps": 37885, "loss": 0.0001, "lr": 2.6869936470075214e-07, "epoch": 3.9250362940477763, "percentage": 78.5, "elapsed_time": "0:43:39", "remaining_time": "0:11:57", "throughput": 5587.84, "total_tokens": 14638784} +{"current_steps": 29745, "total_steps": 37885, "loss": 0.0, "lr": 2.6838522073805915e-07, "epoch": 3.9256961858255246, "percentage": 78.51, "elapsed_time": "0:43:40", "remaining_time": "0:11:57", "throughput": 5588.13, "total_tokens": 14641408} +{"current_steps": 29750, "total_steps": 37885, "loss": 0.0, "lr": 2.6807123205530523e-07, "epoch": 3.9263560776032733, "percentage": 78.53, "elapsed_time": "0:43:40", "remaining_time": "0:11:56", "throughput": 5588.31, "total_tokens": 14643712} +{"current_steps": 29755, "total_steps": 37885, "loss": 0.0412, "lr": 2.677573987191323e-07, "epoch": 3.9270159693810216, "percentage": 78.54, "elapsed_time": "0:43:40", "remaining_time": "0:11:56", "throughput": 5588.61, "total_tokens": 14646336} +{"current_steps": 29760, "total_steps": 37885, "loss": 0.0008, "lr": 2.674437207961487e-07, "epoch": 3.92767586115877, "percentage": 78.55, "elapsed_time": "0:43:41", "remaining_time": "0:11:55", "throughput": 5588.86, "total_tokens": 14648832} +{"current_steps": 29765, "total_steps": 37885, "loss": 0.0, "lr": 2.671301983529307e-07, "epoch": 3.928335752936518, "percentage": 78.57, "elapsed_time": "0:43:41", "remaining_time": "0:11:55", "throughput": 5589.04, "total_tokens": 14651136} +{"current_steps": 29770, "total_steps": 37885, "loss": 0.0823, "lr": 2.668168314560213e-07, "epoch": 3.928995644714267, "percentage": 78.58, "elapsed_time": "0:43:41", "remaining_time": "0:11:54", "throughput": 5589.26, "total_tokens": 14653568} +{"current_steps": 29775, "total_steps": 37885, "loss": 0.0239, "lr": 2.6650362017192986e-07, "epoch": 3.929655536492015, "percentage": 78.59, "elapsed_time": "0:43:42", "remaining_time": "0:11:54", "throughput": 5589.5, "total_tokens": 14656000} +{"current_steps": 29780, "total_steps": 37885, "loss": 0.0001, "lr": 2.661905645671335e-07, "epoch": 3.930315428269764, "percentage": 78.61, "elapsed_time": "0:43:42", "remaining_time": "0:11:53", "throughput": 5589.72, "total_tokens": 14658432} +{"current_steps": 29785, "total_steps": 37885, "loss": 0.0035, "lr": 2.658776647080759e-07, "epoch": 3.930975320047512, "percentage": 78.62, "elapsed_time": "0:43:42", "remaining_time": "0:11:53", "throughput": 5590.02, "total_tokens": 14661056} +{"current_steps": 29790, "total_steps": 37885, "loss": 0.0337, "lr": 2.655649206611683e-07, "epoch": 3.9316352118252604, "percentage": 78.63, "elapsed_time": "0:43:43", "remaining_time": "0:11:52", "throughput": 5590.2, "total_tokens": 14663360} +{"current_steps": 29795, "total_steps": 37885, "loss": 0.0002, "lr": 2.652523324927876e-07, "epoch": 3.932295103603009, "percentage": 78.65, "elapsed_time": "0:43:43", "remaining_time": "0:11:52", "throughput": 5590.45, "total_tokens": 14665856} +{"current_steps": 29800, "total_steps": 37885, "loss": 0.0, "lr": 2.649399002692786e-07, "epoch": 3.9329549953807574, "percentage": 78.66, "elapsed_time": "0:43:43", "remaining_time": "0:11:51", "throughput": 5590.65, "total_tokens": 14668224} +{"current_steps": 29805, "total_steps": 37885, "loss": 0.0, "lr": 2.6462762405695314e-07, "epoch": 3.933614887158506, "percentage": 78.67, "elapsed_time": "0:43:44", "remaining_time": "0:11:51", "throughput": 5590.81, "total_tokens": 14670464} +{"current_steps": 29810, "total_steps": 37885, "loss": 0.0352, "lr": 2.6431550392208924e-07, "epoch": 3.9342747789362544, "percentage": 78.69, "elapsed_time": "0:43:44", "remaining_time": "0:11:50", "throughput": 5591.11, "total_tokens": 14673088} +{"current_steps": 29815, "total_steps": 37885, "loss": 0.0, "lr": 2.6400353993093205e-07, "epoch": 3.9349346707140027, "percentage": 78.7, "elapsed_time": "0:43:44", "remaining_time": "0:11:50", "throughput": 5591.36, "total_tokens": 14675584} +{"current_steps": 29820, "total_steps": 37885, "loss": 0.0, "lr": 2.636917321496939e-07, "epoch": 3.9355945624917514, "percentage": 78.71, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5591.69, "total_tokens": 14678336} +{"current_steps": 29825, "total_steps": 37885, "loss": 0.0, "lr": 2.6338008064455395e-07, "epoch": 3.9362544542694997, "percentage": 78.73, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5591.96, "total_tokens": 14680896} +{"current_steps": 29830, "total_steps": 37885, "loss": 0.0008, "lr": 2.6306858548165776e-07, "epoch": 3.9369143460472484, "percentage": 78.74, "elapsed_time": "0:43:45", "remaining_time": "0:11:49", "throughput": 5592.14, "total_tokens": 14683200} +{"current_steps": 29835, "total_steps": 37885, "loss": 0.0, "lr": 2.627572467271172e-07, "epoch": 3.9375742378249967, "percentage": 78.75, "elapsed_time": "0:43:46", "remaining_time": "0:11:48", "throughput": 5592.41, "total_tokens": 14685760} +{"current_steps": 29840, "total_steps": 37885, "loss": 0.0, "lr": 2.62446064447013e-07, "epoch": 3.938234129602745, "percentage": 78.76, "elapsed_time": "0:43:46", "remaining_time": "0:11:48", "throughput": 5592.66, "total_tokens": 14688256} +{"current_steps": 29845, "total_steps": 37885, "loss": 0.0617, "lr": 2.621350387073903e-07, "epoch": 3.9388940213804937, "percentage": 78.78, "elapsed_time": "0:43:46", "remaining_time": "0:11:47", "throughput": 5592.82, "total_tokens": 14690496} +{"current_steps": 29850, "total_steps": 37885, "loss": 0.0, "lr": 2.618241695742628e-07, "epoch": 3.939553913158242, "percentage": 78.79, "elapsed_time": "0:43:47", "remaining_time": "0:11:47", "throughput": 5593.06, "total_tokens": 14692992} +{"current_steps": 29855, "total_steps": 37885, "loss": 0.0001, "lr": 2.615134571136095e-07, "epoch": 3.9402138049359907, "percentage": 78.8, "elapsed_time": "0:43:47", "remaining_time": "0:11:46", "throughput": 5593.2, "total_tokens": 14695168} +{"current_steps": 29860, "total_steps": 37885, "loss": 0.1172, "lr": 2.6120290139137726e-07, "epoch": 3.940873696713739, "percentage": 78.82, "elapsed_time": "0:43:47", "remaining_time": "0:11:46", "throughput": 5593.45, "total_tokens": 14697664} +{"current_steps": 29865, "total_steps": 37885, "loss": 0.0, "lr": 2.608925024734795e-07, "epoch": 3.9415335884914873, "percentage": 78.83, "elapsed_time": "0:43:47", "remaining_time": "0:11:45", "throughput": 5593.8, "total_tokens": 14700480} +{"current_steps": 29870, "total_steps": 37885, "loss": 0.0001, "lr": 2.605822604257953e-07, "epoch": 3.942193480269236, "percentage": 78.84, "elapsed_time": "0:43:48", "remaining_time": "0:11:45", "throughput": 5594.16, "total_tokens": 14703296} +{"current_steps": 29875, "total_steps": 37885, "loss": 0.0, "lr": 2.6027217531417256e-07, "epoch": 3.9428533720469843, "percentage": 78.86, "elapsed_time": "0:43:48", "remaining_time": "0:11:44", "throughput": 5594.27, "total_tokens": 14705408} +{"current_steps": 29880, "total_steps": 37885, "loss": 0.0001, "lr": 2.5996224720442394e-07, "epoch": 3.943513263824733, "percentage": 78.87, "elapsed_time": "0:43:48", "remaining_time": "0:11:44", "throughput": 5594.45, "total_tokens": 14707712} +{"current_steps": 29885, "total_steps": 37885, "loss": 0.0, "lr": 2.59652476162329e-07, "epoch": 3.9441731556024813, "percentage": 78.88, "elapsed_time": "0:43:49", "remaining_time": "0:11:43", "throughput": 5594.7, "total_tokens": 14710208} +{"current_steps": 29890, "total_steps": 37885, "loss": 0.0001, "lr": 2.593428622536349e-07, "epoch": 3.9448330473802296, "percentage": 78.9, "elapsed_time": "0:43:49", "remaining_time": "0:11:43", "throughput": 5594.92, "total_tokens": 14712640} +{"current_steps": 29895, "total_steps": 37885, "loss": 0.061, "lr": 2.5903340554405485e-07, "epoch": 3.945492939157978, "percentage": 78.91, "elapsed_time": "0:43:49", "remaining_time": "0:11:42", "throughput": 5595.17, "total_tokens": 14715136} +{"current_steps": 29900, "total_steps": 37885, "loss": 0.0, "lr": 2.587241060992691e-07, "epoch": 3.9461528309357266, "percentage": 78.92, "elapsed_time": "0:43:50", "remaining_time": "0:11:42", "throughput": 5595.48, "total_tokens": 14717824} +{"current_steps": 29905, "total_steps": 37885, "loss": 0.0, "lr": 2.5841496398492366e-07, "epoch": 3.946812722713475, "percentage": 78.94, "elapsed_time": "0:43:50", "remaining_time": "0:11:41", "throughput": 5595.73, "total_tokens": 14720320} +{"current_steps": 29910, "total_steps": 37885, "loss": 0.0, "lr": 2.5810597926663205e-07, "epoch": 3.9474726144912236, "percentage": 78.95, "elapsed_time": "0:43:50", "remaining_time": "0:11:41", "throughput": 5595.93, "total_tokens": 14722688} +{"current_steps": 29915, "total_steps": 37885, "loss": 0.1273, "lr": 2.577971520099741e-07, "epoch": 3.948132506268972, "percentage": 78.96, "elapsed_time": "0:43:51", "remaining_time": "0:11:41", "throughput": 5596.09, "total_tokens": 14724928} +{"current_steps": 29920, "total_steps": 37885, "loss": 0.0, "lr": 2.574884822804958e-07, "epoch": 3.94879239804672, "percentage": 78.98, "elapsed_time": "0:43:51", "remaining_time": "0:11:40", "throughput": 5596.32, "total_tokens": 14727360} +{"current_steps": 29925, "total_steps": 37885, "loss": 0.0001, "lr": 2.571799701437103e-07, "epoch": 3.949452289824469, "percentage": 78.99, "elapsed_time": "0:43:51", "remaining_time": "0:11:40", "throughput": 5596.56, "total_tokens": 14729856} +{"current_steps": 29930, "total_steps": 37885, "loss": 0.0003, "lr": 2.568716156650974e-07, "epoch": 3.950112181602217, "percentage": 79.0, "elapsed_time": "0:43:52", "remaining_time": "0:11:39", "throughput": 5596.75, "total_tokens": 14732224} +{"current_steps": 29935, "total_steps": 37885, "loss": 0.028, "lr": 2.5656341891010236e-07, "epoch": 3.950772073379966, "percentage": 79.02, "elapsed_time": "0:43:52", "remaining_time": "0:11:39", "throughput": 5597.07, "total_tokens": 14734912} +{"current_steps": 29940, "total_steps": 37885, "loss": 0.0, "lr": 2.5625537994413825e-07, "epoch": 3.951431965157714, "percentage": 79.03, "elapsed_time": "0:43:52", "remaining_time": "0:11:38", "throughput": 5597.25, "total_tokens": 14737216} +{"current_steps": 29945, "total_steps": 37885, "loss": 0.0523, "lr": 2.559474988325838e-07, "epoch": 3.9520918569354624, "percentage": 79.04, "elapsed_time": "0:43:53", "remaining_time": "0:11:38", "throughput": 5597.48, "total_tokens": 14739648} +{"current_steps": 29950, "total_steps": 37885, "loss": 0.1603, "lr": 2.556397756407852e-07, "epoch": 3.952751748713211, "percentage": 79.06, "elapsed_time": "0:43:53", "remaining_time": "0:11:37", "throughput": 5597.81, "total_tokens": 14742400} +{"current_steps": 29955, "total_steps": 37885, "loss": 0.0001, "lr": 2.5533221043405364e-07, "epoch": 3.9534116404909594, "percentage": 79.07, "elapsed_time": "0:43:53", "remaining_time": "0:11:37", "throughput": 5598.03, "total_tokens": 14744832} +{"current_steps": 29960, "total_steps": 37885, "loss": 0.0003, "lr": 2.5502480327766785e-07, "epoch": 3.954071532268708, "percentage": 79.08, "elapsed_time": "0:43:54", "remaining_time": "0:11:36", "throughput": 5598.31, "total_tokens": 14747392} +{"current_steps": 29965, "total_steps": 37885, "loss": 0.02, "lr": 2.5471755423687326e-07, "epoch": 3.9547314240464564, "percentage": 79.09, "elapsed_time": "0:43:54", "remaining_time": "0:11:36", "throughput": 5598.57, "total_tokens": 14749952} +{"current_steps": 29970, "total_steps": 37885, "loss": 0.0, "lr": 2.5441046337688053e-07, "epoch": 3.9553913158242047, "percentage": 79.11, "elapsed_time": "0:43:54", "remaining_time": "0:11:35", "throughput": 5598.8, "total_tokens": 14752384} +{"current_steps": 29975, "total_steps": 37885, "loss": 0.0849, "lr": 2.541035307628678e-07, "epoch": 3.9560512076019534, "percentage": 79.12, "elapsed_time": "0:43:55", "remaining_time": "0:11:35", "throughput": 5599.05, "total_tokens": 14754880} +{"current_steps": 29980, "total_steps": 37885, "loss": 0.0002, "lr": 2.5379675645997965e-07, "epoch": 3.9567110993797017, "percentage": 79.13, "elapsed_time": "0:43:55", "remaining_time": "0:11:34", "throughput": 5599.23, "total_tokens": 14757184} +{"current_steps": 29985, "total_steps": 37885, "loss": 0.0546, "lr": 2.5349014053332604e-07, "epoch": 3.9573709911574504, "percentage": 79.15, "elapsed_time": "0:43:55", "remaining_time": "0:11:34", "throughput": 5599.5, "total_tokens": 14759744} +{"current_steps": 29990, "total_steps": 37885, "loss": 0.0, "lr": 2.5318368304798464e-07, "epoch": 3.9580308829351987, "percentage": 79.16, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5599.7, "total_tokens": 14762112} +{"current_steps": 29995, "total_steps": 37885, "loss": 0.0, "lr": 2.5287738406899783e-07, "epoch": 3.958690774712947, "percentage": 79.17, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5599.95, "total_tokens": 14764608} +{"current_steps": 30000, "total_steps": 37885, "loss": 0.0015, "lr": 2.525712436613767e-07, "epoch": 3.9593506664906957, "percentage": 79.19, "elapsed_time": "0:43:56", "remaining_time": "0:11:33", "throughput": 5600.19, "total_tokens": 14767104} +{"current_steps": 30005, "total_steps": 37885, "loss": 0.0012, "lr": 2.5226526189009656e-07, "epoch": 3.960010558268444, "percentage": 79.2, "elapsed_time": "0:43:57", "remaining_time": "0:11:32", "throughput": 5600.51, "total_tokens": 14769792} +{"current_steps": 30010, "total_steps": 37885, "loss": 0.0001, "lr": 2.519594388200994e-07, "epoch": 3.9606704500461927, "percentage": 79.21, "elapsed_time": "0:43:57", "remaining_time": "0:11:32", "throughput": 5600.73, "total_tokens": 14772224} +{"current_steps": 30015, "total_steps": 37885, "loss": 0.0007, "lr": 2.51653774516295e-07, "epoch": 3.961330341823941, "percentage": 79.23, "elapsed_time": "0:43:57", "remaining_time": "0:11:31", "throughput": 5601.01, "total_tokens": 14774784} +{"current_steps": 30020, "total_steps": 37885, "loss": 0.0472, "lr": 2.5134826904355767e-07, "epoch": 3.9619902336016892, "percentage": 79.24, "elapsed_time": "0:43:58", "remaining_time": "0:11:31", "throughput": 5601.19, "total_tokens": 14777088} +{"current_steps": 30025, "total_steps": 37885, "loss": 0.0, "lr": 2.510429224667291e-07, "epoch": 3.9626501253794375, "percentage": 79.25, "elapsed_time": "0:43:58", "remaining_time": "0:11:30", "throughput": 5601.32, "total_tokens": 14779264} +{"current_steps": 30030, "total_steps": 37885, "loss": 0.0, "lr": 2.5073773485061645e-07, "epoch": 3.9633100171571862, "percentage": 79.27, "elapsed_time": "0:43:58", "remaining_time": "0:11:30", "throughput": 5601.55, "total_tokens": 14781696} +{"current_steps": 30035, "total_steps": 37885, "loss": 0.0704, "lr": 2.504327062599939e-07, "epoch": 3.9639699089349345, "percentage": 79.28, "elapsed_time": "0:43:59", "remaining_time": "0:11:29", "throughput": 5601.87, "total_tokens": 14784384} +{"current_steps": 30040, "total_steps": 37885, "loss": 0.0657, "lr": 2.501278367596017e-07, "epoch": 3.9646298007126832, "percentage": 79.29, "elapsed_time": "0:43:59", "remaining_time": "0:11:29", "throughput": 5602.07, "total_tokens": 14786752} +{"current_steps": 30045, "total_steps": 37885, "loss": 0.0, "lr": 2.498231264141458e-07, "epoch": 3.9652896924904315, "percentage": 79.31, "elapsed_time": "0:43:59", "remaining_time": "0:11:28", "throughput": 5602.34, "total_tokens": 14789312} +{"current_steps": 30050, "total_steps": 37885, "loss": 0.0004, "lr": 2.495185752882989e-07, "epoch": 3.96594958426818, "percentage": 79.32, "elapsed_time": "0:44:00", "remaining_time": "0:11:28", "throughput": 5602.47, "total_tokens": 14791488} +{"current_steps": 30055, "total_steps": 37885, "loss": 0.0, "lr": 2.492141834467002e-07, "epoch": 3.9666094760459285, "percentage": 79.33, "elapsed_time": "0:44:00", "remaining_time": "0:11:27", "throughput": 5602.67, "total_tokens": 14793856} +{"current_steps": 30060, "total_steps": 37885, "loss": 0.0518, "lr": 2.4890995095395397e-07, "epoch": 3.967269367823677, "percentage": 79.35, "elapsed_time": "0:44:00", "remaining_time": "0:11:27", "throughput": 5602.92, "total_tokens": 14796352} +{"current_steps": 30065, "total_steps": 37885, "loss": 0.0, "lr": 2.486058778746316e-07, "epoch": 3.9679292596014255, "percentage": 79.36, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.21, "total_tokens": 14798976} +{"current_steps": 30070, "total_steps": 37885, "loss": 0.0518, "lr": 2.4830196427327056e-07, "epoch": 3.968589151379174, "percentage": 79.37, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.46, "total_tokens": 14801472} +{"current_steps": 30075, "total_steps": 37885, "loss": 0.0003, "lr": 2.4799821021437463e-07, "epoch": 3.969249043156922, "percentage": 79.38, "elapsed_time": "0:44:01", "remaining_time": "0:11:26", "throughput": 5603.79, "total_tokens": 14804224} +{"current_steps": 30080, "total_steps": 37885, "loss": 0.0, "lr": 2.476946157624126e-07, "epoch": 3.969908934934671, "percentage": 79.4, "elapsed_time": "0:44:02", "remaining_time": "0:11:25", "throughput": 5603.95, "total_tokens": 14806464} +{"current_steps": 30085, "total_steps": 37885, "loss": 0.0001, "lr": 2.4739118098182055e-07, "epoch": 3.970568826712419, "percentage": 79.41, "elapsed_time": "0:44:02", "remaining_time": "0:11:25", "throughput": 5604.29, "total_tokens": 14809216} +{"current_steps": 30090, "total_steps": 37885, "loss": 0.0, "lr": 2.470879059370008e-07, "epoch": 3.971228718490168, "percentage": 79.42, "elapsed_time": "0:44:02", "remaining_time": "0:11:24", "throughput": 5604.41, "total_tokens": 14811392} +{"current_steps": 30095, "total_steps": 37885, "loss": 0.0, "lr": 2.467847906923205e-07, "epoch": 3.971888610267916, "percentage": 79.44, "elapsed_time": "0:44:03", "remaining_time": "0:11:24", "throughput": 5604.64, "total_tokens": 14813824} +{"current_steps": 30100, "total_steps": 37885, "loss": 0.0001, "lr": 2.4648183531211397e-07, "epoch": 3.9725485020456643, "percentage": 79.45, "elapsed_time": "0:44:03", "remaining_time": "0:11:23", "throughput": 5604.78, "total_tokens": 14816000} +{"current_steps": 30105, "total_steps": 37885, "loss": 0.0005, "lr": 2.4617903986068146e-07, "epoch": 3.973208393823413, "percentage": 79.46, "elapsed_time": "0:44:03", "remaining_time": "0:11:23", "throughput": 5604.98, "total_tokens": 14818368} +{"current_steps": 30110, "total_steps": 37885, "loss": 0.0939, "lr": 2.458764044022892e-07, "epoch": 3.9738682856011613, "percentage": 79.48, "elapsed_time": "0:44:04", "remaining_time": "0:11:22", "throughput": 5605.12, "total_tokens": 14820544} +{"current_steps": 30115, "total_steps": 37885, "loss": 0.0003, "lr": 2.455739290011689e-07, "epoch": 3.97452817737891, "percentage": 79.49, "elapsed_time": "0:44:04", "remaining_time": "0:11:22", "throughput": 5605.32, "total_tokens": 14822912} +{"current_steps": 30120, "total_steps": 37885, "loss": 0.0626, "lr": 2.452716137215191e-07, "epoch": 3.9751880691566583, "percentage": 79.5, "elapsed_time": "0:44:04", "remaining_time": "0:11:21", "throughput": 5605.48, "total_tokens": 14825152} +{"current_steps": 30125, "total_steps": 37885, "loss": 0.0, "lr": 2.449694586275042e-07, "epoch": 3.9758479609344066, "percentage": 79.52, "elapsed_time": "0:44:05", "remaining_time": "0:11:21", "throughput": 5605.77, "total_tokens": 14827776} +{"current_steps": 30130, "total_steps": 37885, "loss": 0.0001, "lr": 2.4466746378325384e-07, "epoch": 3.9765078527121553, "percentage": 79.53, "elapsed_time": "0:44:05", "remaining_time": "0:11:20", "throughput": 5606.04, "total_tokens": 14830336} +{"current_steps": 30135, "total_steps": 37885, "loss": 0.0005, "lr": 2.4436562925286473e-07, "epoch": 3.9771677444899036, "percentage": 79.54, "elapsed_time": "0:44:05", "remaining_time": "0:11:20", "throughput": 5606.31, "total_tokens": 14832896} +{"current_steps": 30140, "total_steps": 37885, "loss": 0.0213, "lr": 2.440639551003992e-07, "epoch": 3.9778276362676523, "percentage": 79.56, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.46, "total_tokens": 14835136} +{"current_steps": 30145, "total_steps": 37885, "loss": 0.0001, "lr": 2.437624413898849e-07, "epoch": 3.9784875280454006, "percentage": 79.57, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.64, "total_tokens": 14837440} +{"current_steps": 30150, "total_steps": 37885, "loss": 0.0549, "lr": 2.4346108818531605e-07, "epoch": 3.979147419823149, "percentage": 79.58, "elapsed_time": "0:44:06", "remaining_time": "0:11:19", "throughput": 5606.95, "total_tokens": 14840128} +{"current_steps": 30155, "total_steps": 37885, "loss": 0.0001, "lr": 2.4315989555065284e-07, "epoch": 3.979807311600897, "percentage": 79.6, "elapsed_time": "0:44:07", "remaining_time": "0:11:18", "throughput": 5607.17, "total_tokens": 14842560} +{"current_steps": 30160, "total_steps": 37885, "loss": 0.0, "lr": 2.428588635498215e-07, "epoch": 3.980467203378646, "percentage": 79.61, "elapsed_time": "0:44:07", "remaining_time": "0:11:18", "throughput": 5607.37, "total_tokens": 14844928} +{"current_steps": 30165, "total_steps": 37885, "loss": 0.0, "lr": 2.425579922467137e-07, "epoch": 3.9811270951563946, "percentage": 79.62, "elapsed_time": "0:44:07", "remaining_time": "0:11:17", "throughput": 5607.5, "total_tokens": 14847104} +{"current_steps": 30170, "total_steps": 37885, "loss": 0.0, "lr": 2.4225728170518636e-07, "epoch": 3.981786986934143, "percentage": 79.64, "elapsed_time": "0:44:08", "remaining_time": "0:11:17", "throughput": 5607.76, "total_tokens": 14849664} +{"current_steps": 30175, "total_steps": 37885, "loss": 0.0518, "lr": 2.419567319890645e-07, "epoch": 3.982446878711891, "percentage": 79.65, "elapsed_time": "0:44:08", "remaining_time": "0:11:16", "throughput": 5607.94, "total_tokens": 14851968} +{"current_steps": 30180, "total_steps": 37885, "loss": 0.0, "lr": 2.416563431621366e-07, "epoch": 3.9831067704896395, "percentage": 79.66, "elapsed_time": "0:44:08", "remaining_time": "0:11:16", "throughput": 5608.16, "total_tokens": 14854400} +{"current_steps": 30185, "total_steps": 37885, "loss": 0.0024, "lr": 2.413561152881587e-07, "epoch": 3.983766662267388, "percentage": 79.68, "elapsed_time": "0:44:09", "remaining_time": "0:11:15", "throughput": 5608.45, "total_tokens": 14857024} +{"current_steps": 30190, "total_steps": 37885, "loss": 0.0, "lr": 2.410560484308514e-07, "epoch": 3.9844265540451365, "percentage": 79.69, "elapsed_time": "0:44:09", "remaining_time": "0:11:15", "throughput": 5608.6, "total_tokens": 14859264} +{"current_steps": 30195, "total_steps": 37885, "loss": 0.0, "lr": 2.407561426539019e-07, "epoch": 3.985086445822885, "percentage": 79.7, "elapsed_time": "0:44:09", "remaining_time": "0:11:14", "throughput": 5608.87, "total_tokens": 14861824} +{"current_steps": 30200, "total_steps": 37885, "loss": 0.0037, "lr": 2.404563980209634e-07, "epoch": 3.9857463376006335, "percentage": 79.71, "elapsed_time": "0:44:10", "remaining_time": "0:11:14", "throughput": 5609.08, "total_tokens": 14864256} +{"current_steps": 30205, "total_steps": 37885, "loss": 0.0001, "lr": 2.401568145956537e-07, "epoch": 3.9864062293783817, "percentage": 79.73, "elapsed_time": "0:44:10", "remaining_time": "0:11:13", "throughput": 5609.21, "total_tokens": 14866432} +{"current_steps": 30210, "total_steps": 37885, "loss": 0.0001, "lr": 2.398573924415583e-07, "epoch": 3.9870661211561305, "percentage": 79.74, "elapsed_time": "0:44:10", "remaining_time": "0:11:13", "throughput": 5609.56, "total_tokens": 14869248} +{"current_steps": 30215, "total_steps": 37885, "loss": 0.0, "lr": 2.395581316222269e-07, "epoch": 3.9877260129338787, "percentage": 79.75, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5609.74, "total_tokens": 14871552} +{"current_steps": 30220, "total_steps": 37885, "loss": 0.0503, "lr": 2.3925903220117506e-07, "epoch": 3.9883859047116275, "percentage": 79.77, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5610.12, "total_tokens": 14874432} +{"current_steps": 30225, "total_steps": 37885, "loss": 0.0253, "lr": 2.389600942418848e-07, "epoch": 3.9890457964893757, "percentage": 79.78, "elapsed_time": "0:44:11", "remaining_time": "0:11:12", "throughput": 5610.31, "total_tokens": 14876800} +{"current_steps": 30230, "total_steps": 37885, "loss": 0.0, "lr": 2.386613178078035e-07, "epoch": 3.989705688267124, "percentage": 79.79, "elapsed_time": "0:44:12", "remaining_time": "0:11:11", "throughput": 5610.51, "total_tokens": 14879168} +{"current_steps": 30235, "total_steps": 37885, "loss": 0.0, "lr": 2.3836270296234463e-07, "epoch": 3.9903655800448727, "percentage": 79.81, "elapsed_time": "0:44:12", "remaining_time": "0:11:11", "throughput": 5610.77, "total_tokens": 14881728} +{"current_steps": 30240, "total_steps": 37885, "loss": 0.0001, "lr": 2.3806424976888639e-07, "epoch": 3.991025471822621, "percentage": 79.82, "elapsed_time": "0:44:12", "remaining_time": "0:11:10", "throughput": 5611.03, "total_tokens": 14884288} +{"current_steps": 30245, "total_steps": 37885, "loss": 0.0472, "lr": 2.3776595829077362e-07, "epoch": 3.9916853636003697, "percentage": 79.83, "elapsed_time": "0:44:13", "remaining_time": "0:11:10", "throughput": 5611.34, "total_tokens": 14886976} +{"current_steps": 30250, "total_steps": 37885, "loss": 0.0006, "lr": 2.3746782859131685e-07, "epoch": 3.992345255378118, "percentage": 79.85, "elapsed_time": "0:44:13", "remaining_time": "0:11:09", "throughput": 5611.62, "total_tokens": 14889600} +{"current_steps": 30255, "total_steps": 37885, "loss": 0.0565, "lr": 2.371698607337913e-07, "epoch": 3.9930051471558663, "percentage": 79.86, "elapsed_time": "0:44:13", "remaining_time": "0:11:09", "throughput": 5611.91, "total_tokens": 14892224} +{"current_steps": 30260, "total_steps": 37885, "loss": 0.0001, "lr": 2.368720547814389e-07, "epoch": 3.993665038933615, "percentage": 79.87, "elapsed_time": "0:44:14", "remaining_time": "0:11:08", "throughput": 5612.1, "total_tokens": 14894592} +{"current_steps": 30265, "total_steps": 37885, "loss": 0.0, "lr": 2.3657441079746698e-07, "epoch": 3.9943249307113633, "percentage": 79.89, "elapsed_time": "0:44:14", "remaining_time": "0:11:08", "throughput": 5612.43, "total_tokens": 14897344} +{"current_steps": 30270, "total_steps": 37885, "loss": 0.0411, "lr": 2.362769288450478e-07, "epoch": 3.994984822489112, "percentage": 79.9, "elapsed_time": "0:44:14", "remaining_time": "0:11:07", "throughput": 5612.69, "total_tokens": 14899904} +{"current_steps": 30275, "total_steps": 37885, "loss": 0.0, "lr": 2.3597960898731995e-07, "epoch": 3.9956447142668603, "percentage": 79.91, "elapsed_time": "0:44:15", "remaining_time": "0:11:07", "throughput": 5612.94, "total_tokens": 14902400} +{"current_steps": 30280, "total_steps": 37885, "loss": 0.0001, "lr": 2.356824512873876e-07, "epoch": 3.9963046060446086, "percentage": 79.93, "elapsed_time": "0:44:15", "remaining_time": "0:11:06", "throughput": 5613.11, "total_tokens": 14904704} +{"current_steps": 30285, "total_steps": 37885, "loss": 0.0009, "lr": 2.3538545580832047e-07, "epoch": 3.9969644978223573, "percentage": 79.94, "elapsed_time": "0:44:15", "remaining_time": "0:11:06", "throughput": 5613.46, "total_tokens": 14907520} +{"current_steps": 30290, "total_steps": 37885, "loss": 0.0414, "lr": 2.350886226131531e-07, "epoch": 3.9976243896001056, "percentage": 79.95, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5613.69, "total_tokens": 14909952} +{"current_steps": 30295, "total_steps": 37885, "loss": 0.0, "lr": 2.3479195176488664e-07, "epoch": 3.9982842813778543, "percentage": 79.97, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5613.99, "total_tokens": 14912640} +{"current_steps": 30300, "total_steps": 37885, "loss": 0.0626, "lr": 2.344954433264874e-07, "epoch": 3.9989441731556026, "percentage": 79.98, "elapsed_time": "0:44:16", "remaining_time": "0:11:05", "throughput": 5614.23, "total_tokens": 14915136} +{"current_steps": 30305, "total_steps": 37885, "loss": 0.0, "lr": 2.3419909736088672e-07, "epoch": 3.999604064933351, "percentage": 79.99, "elapsed_time": "0:44:16", "remaining_time": "0:11:04", "throughput": 5614.43, "total_tokens": 14917504} +{"current_steps": 30310, "total_steps": 37885, "loss": 0.0, "lr": 2.3390291393098215e-07, "epoch": 4.000263956711099, "percentage": 80.01, "elapsed_time": "0:44:17", "remaining_time": "0:11:04", "throughput": 5614.45, "total_tokens": 14919888} +{"current_steps": 30315, "total_steps": 37885, "loss": 0.0003, "lr": 2.3360689309963666e-07, "epoch": 4.000923848488847, "percentage": 80.02, "elapsed_time": "0:44:17", "remaining_time": "0:11:03", "throughput": 5614.77, "total_tokens": 14922640} +{"current_steps": 30320, "total_steps": 37885, "loss": 0.0002, "lr": 2.333110349296782e-07, "epoch": 4.001583740266597, "percentage": 80.03, "elapsed_time": "0:44:18", "remaining_time": "0:11:03", "throughput": 5614.95, "total_tokens": 14924944} +{"current_steps": 30320, "total_steps": 37885, "eval_loss": 0.1816491037607193, "epoch": 4.001583740266597, "percentage": 80.03, "elapsed_time": "0:44:25", "remaining_time": "0:11:05", "throughput": 5598.42, "total_tokens": 14924944} +{"current_steps": 30325, "total_steps": 37885, "loss": 0.0, "lr": 2.3301533948390072e-07, "epoch": 4.002243632044345, "percentage": 80.04, "elapsed_time": "0:45:03", "remaining_time": "0:11:13", "throughput": 5522.25, "total_tokens": 14927632} +{"current_steps": 30330, "total_steps": 37885, "loss": 0.0001, "lr": 2.3271980682506297e-07, "epoch": 4.002903523822093, "percentage": 80.06, "elapsed_time": "0:45:03", "remaining_time": "0:11:13", "throughput": 5522.49, "total_tokens": 14930128} +{"current_steps": 30335, "total_steps": 37885, "loss": 0.0, "lr": 2.3242443701589054e-07, "epoch": 4.003563415599841, "percentage": 80.07, "elapsed_time": "0:45:03", "remaining_time": "0:11:12", "throughput": 5522.76, "total_tokens": 14932688} +{"current_steps": 30340, "total_steps": 37885, "loss": 0.0002, "lr": 2.3212923011907305e-07, "epoch": 4.00422330737759, "percentage": 80.08, "elapsed_time": "0:45:04", "remaining_time": "0:11:12", "throughput": 5522.98, "total_tokens": 14935120} +{"current_steps": 30345, "total_steps": 37885, "loss": 0.0, "lr": 2.3183418619726523e-07, "epoch": 4.004883199155339, "percentage": 80.1, "elapsed_time": "0:45:04", "remaining_time": "0:11:12", "throughput": 5523.17, "total_tokens": 14937488} +{"current_steps": 30350, "total_steps": 37885, "loss": 0.0176, "lr": 2.3153930531308952e-07, "epoch": 4.005543090933087, "percentage": 80.11, "elapsed_time": "0:45:04", "remaining_time": "0:11:11", "throughput": 5523.42, "total_tokens": 14939984} +{"current_steps": 30355, "total_steps": 37885, "loss": 0.0, "lr": 2.3124458752913123e-07, "epoch": 4.006202982710835, "percentage": 80.12, "elapsed_time": "0:45:05", "remaining_time": "0:11:11", "throughput": 5523.64, "total_tokens": 14942416} +{"current_steps": 30360, "total_steps": 37885, "loss": 0.0006, "lr": 2.3095003290794258e-07, "epoch": 4.006862874488584, "percentage": 80.14, "elapsed_time": "0:45:05", "remaining_time": "0:11:10", "throughput": 5523.8, "total_tokens": 14944656} +{"current_steps": 30365, "total_steps": 37885, "loss": 0.0018, "lr": 2.306556415120401e-07, "epoch": 4.007522766266332, "percentage": 80.15, "elapsed_time": "0:45:05", "remaining_time": "0:11:10", "throughput": 5524.1, "total_tokens": 14947344} +{"current_steps": 30370, "total_steps": 37885, "loss": 0.0014, "lr": 2.3036141340390657e-07, "epoch": 4.008182658044081, "percentage": 80.16, "elapsed_time": "0:45:06", "remaining_time": "0:11:09", "throughput": 5524.28, "total_tokens": 14949648} +{"current_steps": 30375, "total_steps": 37885, "loss": 0.0001, "lr": 2.3006734864599008e-07, "epoch": 4.008842549821829, "percentage": 80.18, "elapsed_time": "0:45:06", "remaining_time": "0:11:09", "throughput": 5524.63, "total_tokens": 14952464} +{"current_steps": 30380, "total_steps": 37885, "loss": 0.0004, "lr": 2.2977344730070314e-07, "epoch": 4.009502441599578, "percentage": 80.19, "elapsed_time": "0:45:06", "remaining_time": "0:11:08", "throughput": 5524.86, "total_tokens": 14954896} +{"current_steps": 30385, "total_steps": 37885, "loss": 0.0504, "lr": 2.294797094304244e-07, "epoch": 4.010162333377326, "percentage": 80.2, "elapsed_time": "0:45:07", "remaining_time": "0:11:08", "throughput": 5525.13, "total_tokens": 14957456} +{"current_steps": 30390, "total_steps": 37885, "loss": 0.0, "lr": 2.2918613509749795e-07, "epoch": 4.010822225155074, "percentage": 80.22, "elapsed_time": "0:45:07", "remaining_time": "0:11:07", "throughput": 5525.28, "total_tokens": 14959696} +{"current_steps": 30395, "total_steps": 37885, "loss": 0.0001, "lr": 2.2889272436423233e-07, "epoch": 4.011482116932823, "percentage": 80.23, "elapsed_time": "0:45:07", "remaining_time": "0:11:07", "throughput": 5525.53, "total_tokens": 14962192} +{"current_steps": 30400, "total_steps": 37885, "loss": 0.0, "lr": 2.2859947729290207e-07, "epoch": 4.012142008710572, "percentage": 80.24, "elapsed_time": "0:45:08", "remaining_time": "0:11:06", "throughput": 5525.69, "total_tokens": 14964432} +{"current_steps": 30405, "total_steps": 37885, "loss": 0.0, "lr": 2.2830639394574657e-07, "epoch": 4.01280190048832, "percentage": 80.26, "elapsed_time": "0:45:08", "remaining_time": "0:11:06", "throughput": 5525.98, "total_tokens": 14967056} +{"current_steps": 30410, "total_steps": 37885, "loss": 0.0, "lr": 2.280134743849712e-07, "epoch": 4.013461792266068, "percentage": 80.27, "elapsed_time": "0:45:08", "remaining_time": "0:11:05", "throughput": 5526.14, "total_tokens": 14969296} +{"current_steps": 30415, "total_steps": 37885, "loss": 0.056, "lr": 2.2772071867274524e-07, "epoch": 4.0141216840438165, "percentage": 80.28, "elapsed_time": "0:45:09", "remaining_time": "0:11:05", "throughput": 5526.32, "total_tokens": 14971600} +{"current_steps": 30420, "total_steps": 37885, "loss": 0.0006, "lr": 2.2742812687120438e-07, "epoch": 4.014781575821566, "percentage": 80.3, "elapsed_time": "0:45:09", "remaining_time": "0:11:04", "throughput": 5526.49, "total_tokens": 14973904} +{"current_steps": 30425, "total_steps": 37885, "loss": 0.001, "lr": 2.2713569904244934e-07, "epoch": 4.015441467599314, "percentage": 80.31, "elapsed_time": "0:45:09", "remaining_time": "0:11:04", "throughput": 5526.81, "total_tokens": 14976592} +{"current_steps": 30430, "total_steps": 37885, "loss": 0.0005, "lr": 2.268434352485452e-07, "epoch": 4.016101359377062, "percentage": 80.32, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.16, "total_tokens": 14979408} +{"current_steps": 30435, "total_steps": 37885, "loss": 0.0003, "lr": 2.265513355515233e-07, "epoch": 4.0167612511548105, "percentage": 80.34, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.36, "total_tokens": 14981776} +{"current_steps": 30440, "total_steps": 37885, "loss": 0.0, "lr": 2.262594000133795e-07, "epoch": 4.017421142932559, "percentage": 80.35, "elapsed_time": "0:45:10", "remaining_time": "0:11:03", "throughput": 5527.59, "total_tokens": 14984208} +{"current_steps": 30445, "total_steps": 37885, "loss": 0.0007, "lr": 2.2596762869607521e-07, "epoch": 4.018081034710307, "percentage": 80.36, "elapsed_time": "0:45:11", "remaining_time": "0:11:02", "throughput": 5527.83, "total_tokens": 14986704} +{"current_steps": 30450, "total_steps": 37885, "loss": 0.0, "lr": 2.2567602166153653e-07, "epoch": 4.018740926488056, "percentage": 80.37, "elapsed_time": "0:45:11", "remaining_time": "0:11:02", "throughput": 5528.11, "total_tokens": 14989328} +{"current_steps": 30455, "total_steps": 37885, "loss": 0.0001, "lr": 2.2538457897165498e-07, "epoch": 4.0194008182658045, "percentage": 80.39, "elapsed_time": "0:45:11", "remaining_time": "0:11:01", "throughput": 5528.26, "total_tokens": 14991568} +{"current_steps": 30460, "total_steps": 37885, "loss": 0.028, "lr": 2.2509330068828748e-07, "epoch": 4.020060710043553, "percentage": 80.4, "elapsed_time": "0:45:12", "remaining_time": "0:11:01", "throughput": 5528.37, "total_tokens": 14993680} +{"current_steps": 30465, "total_steps": 37885, "loss": 0.0, "lr": 2.2480218687325515e-07, "epoch": 4.020720601821301, "percentage": 80.41, "elapsed_time": "0:45:12", "remaining_time": "0:11:00", "throughput": 5528.57, "total_tokens": 14996048} +{"current_steps": 30470, "total_steps": 37885, "loss": 0.0, "lr": 2.2451123758834512e-07, "epoch": 4.021380493599049, "percentage": 80.43, "elapsed_time": "0:45:12", "remaining_time": "0:11:00", "throughput": 5528.81, "total_tokens": 14998544} +{"current_steps": 30475, "total_steps": 37885, "loss": 0.0337, "lr": 2.2422045289530967e-07, "epoch": 4.0220403853767985, "percentage": 80.44, "elapsed_time": "0:45:13", "remaining_time": "0:10:59", "throughput": 5529.04, "total_tokens": 15000976} +{"current_steps": 30480, "total_steps": 37885, "loss": 0.0, "lr": 2.2392983285586487e-07, "epoch": 4.022700277154547, "percentage": 80.45, "elapsed_time": "0:45:13", "remaining_time": "0:10:59", "throughput": 5529.25, "total_tokens": 15003408} +{"current_steps": 30485, "total_steps": 37885, "loss": 0.0383, "lr": 2.2363937753169338e-07, "epoch": 4.023360168932295, "percentage": 80.47, "elapsed_time": "0:45:13", "remaining_time": "0:10:58", "throughput": 5529.49, "total_tokens": 15005904} +{"current_steps": 30490, "total_steps": 37885, "loss": 0.0239, "lr": 2.2334908698444188e-07, "epoch": 4.024020060710043, "percentage": 80.48, "elapsed_time": "0:45:14", "remaining_time": "0:10:58", "throughput": 5529.73, "total_tokens": 15008400} +{"current_steps": 30495, "total_steps": 37885, "loss": 0.0, "lr": 2.23058961275723e-07, "epoch": 4.024679952487792, "percentage": 80.49, "elapsed_time": "0:45:14", "remaining_time": "0:10:57", "throughput": 5529.99, "total_tokens": 15010960} +{"current_steps": 30500, "total_steps": 37885, "loss": 0.0, "lr": 2.2276900046711334e-07, "epoch": 4.025339844265541, "percentage": 80.51, "elapsed_time": "0:45:14", "remaining_time": "0:10:57", "throughput": 5530.21, "total_tokens": 15013392} +{"current_steps": 30505, "total_steps": 37885, "loss": 0.0005, "lr": 2.2247920462015458e-07, "epoch": 4.025999736043289, "percentage": 80.52, "elapsed_time": "0:45:15", "remaining_time": "0:10:56", "throughput": 5530.52, "total_tokens": 15016080} +{"current_steps": 30510, "total_steps": 37885, "loss": 0.0, "lr": 2.2218957379635483e-07, "epoch": 4.026659627821037, "percentage": 80.53, "elapsed_time": "0:45:15", "remaining_time": "0:10:56", "throughput": 5530.67, "total_tokens": 15018320} +{"current_steps": 30515, "total_steps": 37885, "loss": 0.0, "lr": 2.2190010805718528e-07, "epoch": 4.027319519598786, "percentage": 80.55, "elapsed_time": "0:45:15", "remaining_time": "0:10:55", "throughput": 5530.98, "total_tokens": 15021008} +{"current_steps": 30520, "total_steps": 37885, "loss": 0.0, "lr": 2.2161080746408345e-07, "epoch": 4.027979411376534, "percentage": 80.56, "elapsed_time": "0:45:16", "remaining_time": "0:10:55", "throughput": 5531.16, "total_tokens": 15023312} +{"current_steps": 30525, "total_steps": 37885, "loss": 0.0023, "lr": 2.2132167207845087e-07, "epoch": 4.028639303154283, "percentage": 80.57, "elapsed_time": "0:45:16", "remaining_time": "0:10:54", "throughput": 5531.31, "total_tokens": 15025552} +{"current_steps": 30530, "total_steps": 37885, "loss": 0.0, "lr": 2.2103270196165468e-07, "epoch": 4.029299194932031, "percentage": 80.59, "elapsed_time": "0:45:16", "remaining_time": "0:10:54", "throughput": 5531.59, "total_tokens": 15028176} +{"current_steps": 30535, "total_steps": 37885, "loss": 0.0, "lr": 2.2074389717502695e-07, "epoch": 4.02995908670978, "percentage": 80.6, "elapsed_time": "0:45:17", "remaining_time": "0:10:54", "throughput": 5531.76, "total_tokens": 15030416} +{"current_steps": 30540, "total_steps": 37885, "loss": 0.0008, "lr": 2.204552577798635e-07, "epoch": 4.030618978487528, "percentage": 80.61, "elapsed_time": "0:45:17", "remaining_time": "0:10:53", "throughput": 5531.94, "total_tokens": 15032720} +{"current_steps": 30545, "total_steps": 37885, "loss": 0.0004, "lr": 2.2016678383742714e-07, "epoch": 4.031278870265276, "percentage": 80.63, "elapsed_time": "0:45:17", "remaining_time": "0:10:53", "throughput": 5532.19, "total_tokens": 15035216} +{"current_steps": 30550, "total_steps": 37885, "loss": 0.0, "lr": 2.1987847540894378e-07, "epoch": 4.031938762043025, "percentage": 80.64, "elapsed_time": "0:45:18", "remaining_time": "0:10:52", "throughput": 5532.42, "total_tokens": 15037648} +{"current_steps": 30555, "total_steps": 37885, "loss": 0.0001, "lr": 2.1959033255560455e-07, "epoch": 4.032598653820774, "percentage": 80.65, "elapsed_time": "0:45:18", "remaining_time": "0:10:52", "throughput": 5532.62, "total_tokens": 15040016} +{"current_steps": 30560, "total_steps": 37885, "loss": 0.0009, "lr": 2.19302355338566e-07, "epoch": 4.033258545598522, "percentage": 80.67, "elapsed_time": "0:45:18", "remaining_time": "0:10:51", "throughput": 5532.94, "total_tokens": 15042768} +{"current_steps": 30565, "total_steps": 37885, "loss": 0.028, "lr": 2.1901454381894914e-07, "epoch": 4.03391843737627, "percentage": 80.68, "elapsed_time": "0:45:19", "remaining_time": "0:10:51", "throughput": 5533.11, "total_tokens": 15045008} +{"current_steps": 30570, "total_steps": 37885, "loss": 0.0, "lr": 2.1872689805784007e-07, "epoch": 4.0345783291540185, "percentage": 80.69, "elapsed_time": "0:45:19", "remaining_time": "0:10:50", "throughput": 5533.31, "total_tokens": 15047376} +{"current_steps": 30575, "total_steps": 37885, "loss": 0.0008, "lr": 2.1843941811628918e-07, "epoch": 4.035238220931767, "percentage": 80.7, "elapsed_time": "0:45:19", "remaining_time": "0:10:50", "throughput": 5533.62, "total_tokens": 15050064} +{"current_steps": 30580, "total_steps": 37885, "loss": 0.0, "lr": 2.1815210405531214e-07, "epoch": 4.035898112709516, "percentage": 80.72, "elapsed_time": "0:45:20", "remaining_time": "0:10:49", "throughput": 5533.86, "total_tokens": 15052560} +{"current_steps": 30585, "total_steps": 37885, "loss": 0.0, "lr": 2.1786495593588972e-07, "epoch": 4.036558004487264, "percentage": 80.73, "elapsed_time": "0:45:20", "remaining_time": "0:10:49", "throughput": 5534.1, "total_tokens": 15055056} +{"current_steps": 30590, "total_steps": 37885, "loss": 0.0, "lr": 2.1757797381896625e-07, "epoch": 4.0372178962650125, "percentage": 80.74, "elapsed_time": "0:45:20", "remaining_time": "0:10:48", "throughput": 5534.2, "total_tokens": 15057168} +{"current_steps": 30595, "total_steps": 37885, "loss": 0.0, "lr": 2.1729115776545192e-07, "epoch": 4.037877788042761, "percentage": 80.76, "elapsed_time": "0:45:21", "remaining_time": "0:10:48", "throughput": 5534.35, "total_tokens": 15059408} +{"current_steps": 30600, "total_steps": 37885, "loss": 0.0, "lr": 2.170045078362218e-07, "epoch": 4.038537679820509, "percentage": 80.77, "elapsed_time": "0:45:21", "remaining_time": "0:10:47", "throughput": 5534.63, "total_tokens": 15062032} +{"current_steps": 30605, "total_steps": 37885, "loss": 0.001, "lr": 2.167180240921145e-07, "epoch": 4.039197571598258, "percentage": 80.78, "elapsed_time": "0:45:21", "remaining_time": "0:10:47", "throughput": 5534.87, "total_tokens": 15064528} +{"current_steps": 30610, "total_steps": 37885, "loss": 0.0, "lr": 2.1643170659393461e-07, "epoch": 4.0398574633760065, "percentage": 80.8, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.01, "total_tokens": 15066704} +{"current_steps": 30615, "total_steps": 37885, "loss": 0.0014, "lr": 2.1614555540245083e-07, "epoch": 4.040517355153755, "percentage": 80.81, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.24, "total_tokens": 15069200} +{"current_steps": 30620, "total_steps": 37885, "loss": 0.0, "lr": 2.1585957057839688e-07, "epoch": 4.041177246931503, "percentage": 80.82, "elapsed_time": "0:45:22", "remaining_time": "0:10:46", "throughput": 5535.4, "total_tokens": 15071440} +{"current_steps": 30625, "total_steps": 37885, "loss": 0.0001, "lr": 2.1557375218247053e-07, "epoch": 4.041837138709251, "percentage": 80.84, "elapsed_time": "0:45:23", "remaining_time": "0:10:45", "throughput": 5535.72, "total_tokens": 15074192} +{"current_steps": 30630, "total_steps": 37885, "loss": 0.0, "lr": 2.1528810027533495e-07, "epoch": 4.0424970304870005, "percentage": 80.85, "elapsed_time": "0:45:23", "remaining_time": "0:10:45", "throughput": 5535.95, "total_tokens": 15076624} +{"current_steps": 30635, "total_steps": 37885, "loss": 0.0, "lr": 2.1500261491761796e-07, "epoch": 4.043156922264749, "percentage": 80.86, "elapsed_time": "0:45:23", "remaining_time": "0:10:44", "throughput": 5536.22, "total_tokens": 15079248} +{"current_steps": 30640, "total_steps": 37885, "loss": 0.0005, "lr": 2.1471729616991107e-07, "epoch": 4.043816814042497, "percentage": 80.88, "elapsed_time": "0:45:24", "remaining_time": "0:10:44", "throughput": 5536.38, "total_tokens": 15081488} +{"current_steps": 30645, "total_steps": 37885, "loss": 0.0, "lr": 2.1443214409277154e-07, "epoch": 4.044476705820245, "percentage": 80.89, "elapsed_time": "0:45:24", "remaining_time": "0:10:43", "throughput": 5536.58, "total_tokens": 15083856} +{"current_steps": 30650, "total_steps": 37885, "loss": 0.0004, "lr": 2.1414715874672117e-07, "epoch": 4.045136597597994, "percentage": 80.9, "elapsed_time": "0:45:24", "remaining_time": "0:10:43", "throughput": 5536.74, "total_tokens": 15086160} +{"current_steps": 30655, "total_steps": 37885, "loss": 0.0, "lr": 2.1386234019224525e-07, "epoch": 4.045796489375743, "percentage": 80.92, "elapsed_time": "0:45:25", "remaining_time": "0:10:42", "throughput": 5536.88, "total_tokens": 15088336} +{"current_steps": 30660, "total_steps": 37885, "loss": 0.0, "lr": 2.1357768848979518e-07, "epoch": 4.046456381153491, "percentage": 80.93, "elapsed_time": "0:45:25", "remaining_time": "0:10:42", "throughput": 5537.12, "total_tokens": 15090832} +{"current_steps": 30665, "total_steps": 37885, "loss": 0.0162, "lr": 2.1329320369978532e-07, "epoch": 4.047116272931239, "percentage": 80.94, "elapsed_time": "0:45:25", "remaining_time": "0:10:41", "throughput": 5537.38, "total_tokens": 15093392} +{"current_steps": 30670, "total_steps": 37885, "loss": 0.0, "lr": 2.130088858825967e-07, "epoch": 4.047776164708988, "percentage": 80.96, "elapsed_time": "0:45:26", "remaining_time": "0:10:41", "throughput": 5537.71, "total_tokens": 15096144} +{"current_steps": 30675, "total_steps": 37885, "loss": 0.0028, "lr": 2.1272473509857313e-07, "epoch": 4.048436056486736, "percentage": 80.97, "elapsed_time": "0:45:26", "remaining_time": "0:10:40", "throughput": 5537.89, "total_tokens": 15098512} +{"current_steps": 30680, "total_steps": 37885, "loss": 0.0188, "lr": 2.1244075140802298e-07, "epoch": 4.049095948264485, "percentage": 80.98, "elapsed_time": "0:45:26", "remaining_time": "0:10:40", "throughput": 5538.13, "total_tokens": 15101008} +{"current_steps": 30685, "total_steps": 37885, "loss": 0.0352, "lr": 2.1215693487122078e-07, "epoch": 4.049755840042233, "percentage": 81.0, "elapsed_time": "0:45:27", "remaining_time": "0:10:39", "throughput": 5538.41, "total_tokens": 15103632} +{"current_steps": 30690, "total_steps": 37885, "loss": 0.0001, "lr": 2.118732855484038e-07, "epoch": 4.050415731819982, "percentage": 81.01, "elapsed_time": "0:45:27", "remaining_time": "0:10:39", "throughput": 5538.65, "total_tokens": 15106128} +{"current_steps": 30695, "total_steps": 37885, "loss": 0.0564, "lr": 2.1158980349977496e-07, "epoch": 4.05107562359773, "percentage": 81.02, "elapsed_time": "0:45:27", "remaining_time": "0:10:38", "throughput": 5538.84, "total_tokens": 15108496} +{"current_steps": 30700, "total_steps": 37885, "loss": 0.0, "lr": 2.1130648878550095e-07, "epoch": 4.051735515375478, "percentage": 81.03, "elapsed_time": "0:45:28", "remaining_time": "0:10:38", "throughput": 5539.14, "total_tokens": 15111184} +{"current_steps": 30705, "total_steps": 37885, "loss": 0.0468, "lr": 2.1102334146571342e-07, "epoch": 4.052395407153226, "percentage": 81.05, "elapsed_time": "0:45:28", "remaining_time": "0:10:38", "throughput": 5539.47, "total_tokens": 15114000} +{"current_steps": 30710, "total_steps": 37885, "loss": 0.0, "lr": 2.1074036160050867e-07, "epoch": 4.053055298930976, "percentage": 81.06, "elapsed_time": "0:45:28", "remaining_time": "0:10:37", "throughput": 5539.62, "total_tokens": 15116240} +{"current_steps": 30715, "total_steps": 37885, "loss": 0.0352, "lr": 2.104575492499464e-07, "epoch": 4.053715190708724, "percentage": 81.07, "elapsed_time": "0:45:29", "remaining_time": "0:10:37", "throughput": 5539.89, "total_tokens": 15118864} +{"current_steps": 30720, "total_steps": 37885, "loss": 0.0, "lr": 2.1017490447405195e-07, "epoch": 4.054375082486472, "percentage": 81.09, "elapsed_time": "0:45:29", "remaining_time": "0:10:36", "throughput": 5540.19, "total_tokens": 15121552} +{"current_steps": 30725, "total_steps": 37885, "loss": 0.0, "lr": 2.0989242733281486e-07, "epoch": 4.05503497426422, "percentage": 81.1, "elapsed_time": "0:45:29", "remaining_time": "0:10:36", "throughput": 5540.33, "total_tokens": 15123792} +{"current_steps": 30730, "total_steps": 37885, "loss": 0.0002, "lr": 2.0961011788618833e-07, "epoch": 4.055694866041969, "percentage": 81.11, "elapsed_time": "0:45:30", "remaining_time": "0:10:35", "throughput": 5540.53, "total_tokens": 15126224} +{"current_steps": 30735, "total_steps": 37885, "loss": 0.0, "lr": 2.0932797619409058e-07, "epoch": 4.056354757819718, "percentage": 81.13, "elapsed_time": "0:45:30", "remaining_time": "0:10:35", "throughput": 5540.84, "total_tokens": 15128912} +{"current_steps": 30740, "total_steps": 37885, "loss": 0.0, "lr": 2.0904600231640435e-07, "epoch": 4.057014649597466, "percentage": 81.14, "elapsed_time": "0:45:30", "remaining_time": "0:10:34", "throughput": 5541.08, "total_tokens": 15131472} +{"current_steps": 30745, "total_steps": 37885, "loss": 0.0, "lr": 2.0876419631297682e-07, "epoch": 4.057674541375214, "percentage": 81.15, "elapsed_time": "0:45:31", "remaining_time": "0:10:34", "throughput": 5541.25, "total_tokens": 15133776} +{"current_steps": 30750, "total_steps": 37885, "loss": 0.0004, "lr": 2.084825582436186e-07, "epoch": 4.058334433152963, "percentage": 81.17, "elapsed_time": "0:45:31", "remaining_time": "0:10:33", "throughput": 5541.52, "total_tokens": 15136400} +{"current_steps": 30755, "total_steps": 37885, "loss": 0.0, "lr": 2.0820108816810565e-07, "epoch": 4.058994324930711, "percentage": 81.18, "elapsed_time": "0:45:31", "remaining_time": "0:10:33", "throughput": 5541.72, "total_tokens": 15138832} +{"current_steps": 30760, "total_steps": 37885, "loss": 0.0, "lr": 2.0791978614617834e-07, "epoch": 4.05965421670846, "percentage": 81.19, "elapsed_time": "0:45:32", "remaining_time": "0:10:32", "throughput": 5542.01, "total_tokens": 15141520} +{"current_steps": 30765, "total_steps": 37885, "loss": 0.0, "lr": 2.0763865223754028e-07, "epoch": 4.060314108486208, "percentage": 81.21, "elapsed_time": "0:45:32", "remaining_time": "0:10:32", "throughput": 5542.14, "total_tokens": 15143760} +{"current_steps": 30770, "total_steps": 37885, "loss": 0.0, "lr": 2.0735768650186058e-07, "epoch": 4.060974000263957, "percentage": 81.22, "elapsed_time": "0:45:32", "remaining_time": "0:10:31", "throughput": 5542.3, "total_tokens": 15146128} +{"current_steps": 30775, "total_steps": 37885, "loss": 0.0032, "lr": 2.0707688899877195e-07, "epoch": 4.061633892041705, "percentage": 81.23, "elapsed_time": "0:45:33", "remaining_time": "0:10:31", "throughput": 5542.57, "total_tokens": 15148752} +{"current_steps": 30780, "total_steps": 37885, "loss": 0.0164, "lr": 2.0679625978787196e-07, "epoch": 4.062293783819453, "percentage": 81.25, "elapsed_time": "0:45:33", "remaining_time": "0:10:30", "throughput": 5542.66, "total_tokens": 15150928} +{"current_steps": 30785, "total_steps": 37885, "loss": 0.0, "lr": 2.0651579892872173e-07, "epoch": 4.062953675597202, "percentage": 81.26, "elapsed_time": "0:45:33", "remaining_time": "0:10:30", "throughput": 5542.87, "total_tokens": 15153424} +{"current_steps": 30790, "total_steps": 37885, "loss": 0.0, "lr": 2.0623550648084719e-07, "epoch": 4.063613567374951, "percentage": 81.27, "elapsed_time": "0:45:34", "remaining_time": "0:10:30", "throughput": 5543.14, "total_tokens": 15156112} +{"current_steps": 30795, "total_steps": 37885, "loss": 0.0, "lr": 2.0595538250373868e-07, "epoch": 4.064273459152699, "percentage": 81.29, "elapsed_time": "0:45:34", "remaining_time": "0:10:29", "throughput": 5543.35, "total_tokens": 15158608} +{"current_steps": 30800, "total_steps": 37885, "loss": 0.0, "lr": 2.0567542705684992e-07, "epoch": 4.064933350930447, "percentage": 81.3, "elapsed_time": "0:45:34", "remaining_time": "0:10:29", "throughput": 5543.55, "total_tokens": 15161040} +{"current_steps": 30805, "total_steps": 37885, "loss": 0.0, "lr": 2.0539564019959965e-07, "epoch": 4.0655932427081956, "percentage": 81.31, "elapsed_time": "0:45:35", "remaining_time": "0:10:28", "throughput": 5543.84, "total_tokens": 15163792} +{"current_steps": 30810, "total_steps": 37885, "loss": 0.0, "lr": 2.05116021991371e-07, "epoch": 4.066253134485945, "percentage": 81.33, "elapsed_time": "0:45:35", "remaining_time": "0:10:28", "throughput": 5544.08, "total_tokens": 15166352} +{"current_steps": 30815, "total_steps": 37885, "loss": 0.0, "lr": 2.0483657249151043e-07, "epoch": 4.066913026263693, "percentage": 81.34, "elapsed_time": "0:45:35", "remaining_time": "0:10:27", "throughput": 5544.2, "total_tokens": 15168592} +{"current_steps": 30820, "total_steps": 37885, "loss": 0.0002, "lr": 2.045572917593291e-07, "epoch": 4.067572918041441, "percentage": 81.35, "elapsed_time": "0:45:36", "remaining_time": "0:10:27", "throughput": 5544.51, "total_tokens": 15171344} +{"current_steps": 30825, "total_steps": 37885, "loss": 0.0, "lr": 2.0427817985410245e-07, "epoch": 4.0682328098191896, "percentage": 81.36, "elapsed_time": "0:45:36", "remaining_time": "0:10:26", "throughput": 5544.7, "total_tokens": 15173776} +{"current_steps": 30830, "total_steps": 37885, "loss": 0.0007, "lr": 2.0399923683507026e-07, "epoch": 4.068892701596938, "percentage": 81.38, "elapsed_time": "0:45:36", "remaining_time": "0:10:26", "throughput": 5544.9, "total_tokens": 15176208} +{"current_steps": 30835, "total_steps": 37885, "loss": 0.0, "lr": 2.0372046276143596e-07, "epoch": 4.069552593374686, "percentage": 81.39, "elapsed_time": "0:45:37", "remaining_time": "0:10:25", "throughput": 5545.07, "total_tokens": 15178576} +{"current_steps": 30840, "total_steps": 37885, "loss": 0.0, "lr": 2.0344185769236654e-07, "epoch": 4.070212485152435, "percentage": 81.4, "elapsed_time": "0:45:37", "remaining_time": "0:10:25", "throughput": 5545.17, "total_tokens": 15180752} +{"current_steps": 30845, "total_steps": 37885, "loss": 0.0001, "lr": 2.0316342168699517e-07, "epoch": 4.070872376930184, "percentage": 81.42, "elapsed_time": "0:45:37", "remaining_time": "0:10:24", "throughput": 5545.38, "total_tokens": 15183248} +{"current_steps": 30850, "total_steps": 37885, "loss": 0.0001, "lr": 2.0288515480441714e-07, "epoch": 4.071532268707932, "percentage": 81.43, "elapsed_time": "0:45:38", "remaining_time": "0:10:24", "throughput": 5545.67, "total_tokens": 15185936} +{"current_steps": 30855, "total_steps": 37885, "loss": 0.061, "lr": 2.0260705710369296e-07, "epoch": 4.07219216048568, "percentage": 81.44, "elapsed_time": "0:45:38", "remaining_time": "0:10:23", "throughput": 5545.82, "total_tokens": 15188176} +{"current_steps": 30860, "total_steps": 37885, "loss": 0.0, "lr": 2.0232912864384644e-07, "epoch": 4.072852052263428, "percentage": 81.46, "elapsed_time": "0:45:39", "remaining_time": "0:10:23", "throughput": 5545.95, "total_tokens": 15190416} +{"current_steps": 30865, "total_steps": 37885, "loss": 0.0003, "lr": 2.0205136948386604e-07, "epoch": 4.073511944041178, "percentage": 81.47, "elapsed_time": "0:45:39", "remaining_time": "0:10:23", "throughput": 5546.16, "total_tokens": 15192848} +{"current_steps": 30870, "total_steps": 37885, "loss": 0.0, "lr": 2.0177377968270438e-07, "epoch": 4.074171835818926, "percentage": 81.48, "elapsed_time": "0:45:39", "remaining_time": "0:10:22", "throughput": 5546.52, "total_tokens": 15195728} +{"current_steps": 30875, "total_steps": 37885, "loss": 0.0, "lr": 2.0149635929927723e-07, "epoch": 4.074831727596674, "percentage": 81.5, "elapsed_time": "0:45:40", "remaining_time": "0:10:22", "throughput": 5546.81, "total_tokens": 15198416} +{"current_steps": 30880, "total_steps": 37885, "loss": 0.0, "lr": 2.0121910839246593e-07, "epoch": 4.075491619374422, "percentage": 81.51, "elapsed_time": "0:45:40", "remaining_time": "0:10:21", "throughput": 5547.04, "total_tokens": 15200912} +{"current_steps": 30885, "total_steps": 37885, "loss": 0.0, "lr": 2.0094202702111462e-07, "epoch": 4.076151511152171, "percentage": 81.52, "elapsed_time": "0:45:40", "remaining_time": "0:10:21", "throughput": 5547.21, "total_tokens": 15203280} +{"current_steps": 30890, "total_steps": 37885, "loss": 0.0, "lr": 2.006651152440315e-07, "epoch": 4.07681140292992, "percentage": 81.54, "elapsed_time": "0:45:41", "remaining_time": "0:10:20", "throughput": 5547.46, "total_tokens": 15205840} +{"current_steps": 30895, "total_steps": 37885, "loss": 0.0, "lr": 2.0038837311998945e-07, "epoch": 4.077471294707668, "percentage": 81.55, "elapsed_time": "0:45:41", "remaining_time": "0:10:20", "throughput": 5547.64, "total_tokens": 15208208} +{"current_steps": 30900, "total_steps": 37885, "loss": 0.0, "lr": 2.0011180070772472e-07, "epoch": 4.078131186485416, "percentage": 81.56, "elapsed_time": "0:45:41", "remaining_time": "0:10:19", "throughput": 5547.82, "total_tokens": 15210576} +{"current_steps": 30905, "total_steps": 37885, "loss": 0.0001, "lr": 1.998353980659383e-07, "epoch": 4.078791078263165, "percentage": 81.58, "elapsed_time": "0:45:42", "remaining_time": "0:10:19", "throughput": 5548.04, "total_tokens": 15213072} +{"current_steps": 30910, "total_steps": 37885, "loss": 0.0, "lr": 1.9955916525329396e-07, "epoch": 4.079450970040913, "percentage": 81.59, "elapsed_time": "0:45:42", "remaining_time": "0:10:18", "throughput": 5548.25, "total_tokens": 15215504} +{"current_steps": 30915, "total_steps": 37885, "loss": 0.0013, "lr": 1.992831023284205e-07, "epoch": 4.080110861818662, "percentage": 81.6, "elapsed_time": "0:45:42", "remaining_time": "0:10:18", "throughput": 5548.37, "total_tokens": 15217680} +{"current_steps": 30920, "total_steps": 37885, "loss": 0.0, "lr": 1.9900720934991055e-07, "epoch": 4.08077075359641, "percentage": 81.62, "elapsed_time": "0:45:43", "remaining_time": "0:10:17", "throughput": 5548.6, "total_tokens": 15220176} +{"current_steps": 30925, "total_steps": 37885, "loss": 0.0, "lr": 1.9873148637631977e-07, "epoch": 4.081430645374159, "percentage": 81.63, "elapsed_time": "0:45:43", "remaining_time": "0:10:17", "throughput": 5548.81, "total_tokens": 15222608} +{"current_steps": 30930, "total_steps": 37885, "loss": 0.13, "lr": 1.9845593346616861e-07, "epoch": 4.082090537151907, "percentage": 81.64, "elapsed_time": "0:45:43", "remaining_time": "0:10:16", "throughput": 5548.97, "total_tokens": 15224912} +{"current_steps": 30935, "total_steps": 37885, "loss": 0.0, "lr": 1.981805506779416e-07, "epoch": 4.082750428929655, "percentage": 81.66, "elapsed_time": "0:45:44", "remaining_time": "0:10:16", "throughput": 5549.15, "total_tokens": 15227280} +{"current_steps": 30940, "total_steps": 37885, "loss": 0.0, "lr": 1.9790533807008613e-07, "epoch": 4.083410320707404, "percentage": 81.67, "elapsed_time": "0:45:44", "remaining_time": "0:10:16", "throughput": 5549.3, "total_tokens": 15229520} +{"current_steps": 30945, "total_steps": 37885, "loss": 0.0, "lr": 1.976302957010143e-07, "epoch": 4.084070212485153, "percentage": 81.68, "elapsed_time": "0:45:44", "remaining_time": "0:10:15", "throughput": 5549.52, "total_tokens": 15232016} +{"current_steps": 30950, "total_steps": 37885, "loss": 0.0188, "lr": 1.9735542362910197e-07, "epoch": 4.084730104262901, "percentage": 81.69, "elapsed_time": "0:45:45", "remaining_time": "0:10:15", "throughput": 5549.69, "total_tokens": 15234320} +{"current_steps": 30955, "total_steps": 37885, "loss": 0.0998, "lr": 1.9708072191268886e-07, "epoch": 4.085389996040649, "percentage": 81.71, "elapsed_time": "0:45:45", "remaining_time": "0:10:14", "throughput": 5549.91, "total_tokens": 15236752} +{"current_steps": 30960, "total_steps": 37885, "loss": 0.0001, "lr": 1.9680619061007796e-07, "epoch": 4.0860498878183975, "percentage": 81.72, "elapsed_time": "0:45:45", "remaining_time": "0:10:14", "throughput": 5550.14, "total_tokens": 15239248} +{"current_steps": 30965, "total_steps": 37885, "loss": 0.0005, "lr": 1.9653182977953699e-07, "epoch": 4.086709779596147, "percentage": 81.73, "elapsed_time": "0:45:46", "remaining_time": "0:10:13", "throughput": 5550.36, "total_tokens": 15241680} +{"current_steps": 30970, "total_steps": 37885, "loss": 0.0001, "lr": 1.9625763947929698e-07, "epoch": 4.087369671373895, "percentage": 81.75, "elapsed_time": "0:45:46", "remaining_time": "0:10:13", "throughput": 5550.6, "total_tokens": 15244176} +{"current_steps": 30975, "total_steps": 37885, "loss": 0.0, "lr": 1.9598361976755252e-07, "epoch": 4.088029563151643, "percentage": 81.76, "elapsed_time": "0:45:46", "remaining_time": "0:10:12", "throughput": 5550.75, "total_tokens": 15246416} +{"current_steps": 30980, "total_steps": 37885, "loss": 0.0, "lr": 1.9570977070246254e-07, "epoch": 4.0886894549293915, "percentage": 81.77, "elapsed_time": "0:45:47", "remaining_time": "0:10:12", "throughput": 5550.9, "total_tokens": 15248656} +{"current_steps": 30985, "total_steps": 37885, "loss": 0.0066, "lr": 1.9543609234214987e-07, "epoch": 4.08934934670714, "percentage": 81.79, "elapsed_time": "0:45:47", "remaining_time": "0:10:11", "throughput": 5551.16, "total_tokens": 15251216} +{"current_steps": 30990, "total_steps": 37885, "loss": 0.0, "lr": 1.9516258474470005e-07, "epoch": 4.090009238484888, "percentage": 81.8, "elapsed_time": "0:45:47", "remaining_time": "0:10:11", "throughput": 5551.44, "total_tokens": 15253840} +{"current_steps": 30995, "total_steps": 37885, "loss": 0.0, "lr": 1.948892479681634e-07, "epoch": 4.090669130262637, "percentage": 81.81, "elapsed_time": "0:45:48", "remaining_time": "0:10:10", "throughput": 5551.7, "total_tokens": 15256400} +{"current_steps": 31000, "total_steps": 37885, "loss": 0.0, "lr": 1.946160820705538e-07, "epoch": 4.0913290220403855, "percentage": 81.83, "elapsed_time": "0:45:48", "remaining_time": "0:10:10", "throughput": 5551.86, "total_tokens": 15258640} +{"current_steps": 31005, "total_steps": 37885, "loss": 0.0176, "lr": 1.9434308710984893e-07, "epoch": 4.091988913818134, "percentage": 81.84, "elapsed_time": "0:45:48", "remaining_time": "0:10:09", "throughput": 5552.14, "total_tokens": 15261264} +{"current_steps": 31010, "total_steps": 37885, "loss": 0.0, "lr": 1.9407026314398966e-07, "epoch": 4.092648805595882, "percentage": 81.85, "elapsed_time": "0:45:49", "remaining_time": "0:10:09", "throughput": 5552.36, "total_tokens": 15263696} +{"current_steps": 31015, "total_steps": 37885, "loss": 0.0066, "lr": 1.9379761023088047e-07, "epoch": 4.09330869737363, "percentage": 81.87, "elapsed_time": "0:45:49", "remaining_time": "0:10:09", "throughput": 5552.62, "total_tokens": 15266256} +{"current_steps": 31020, "total_steps": 37885, "loss": 0.0, "lr": 1.9352512842839096e-07, "epoch": 4.0939685891513795, "percentage": 81.88, "elapsed_time": "0:45:49", "remaining_time": "0:10:08", "throughput": 5552.88, "total_tokens": 15268816} +{"current_steps": 31025, "total_steps": 37885, "loss": 0.0322, "lr": 1.9325281779435265e-07, "epoch": 4.094628480929128, "percentage": 81.89, "elapsed_time": "0:45:50", "remaining_time": "0:10:08", "throughput": 5553.1, "total_tokens": 15271248} +{"current_steps": 31030, "total_steps": 37885, "loss": 0.0, "lr": 1.9298067838656196e-07, "epoch": 4.095288372706876, "percentage": 81.91, "elapsed_time": "0:45:50", "remaining_time": "0:10:07", "throughput": 5553.41, "total_tokens": 15273936} +{"current_steps": 31035, "total_steps": 37885, "loss": 0.0, "lr": 1.9270871026277812e-07, "epoch": 4.095948264484624, "percentage": 81.92, "elapsed_time": "0:45:50", "remaining_time": "0:10:07", "throughput": 5553.69, "total_tokens": 15276560} +{"current_steps": 31040, "total_steps": 37885, "loss": 0.0, "lr": 1.9243691348072454e-07, "epoch": 4.096608156262373, "percentage": 81.93, "elapsed_time": "0:45:51", "remaining_time": "0:10:06", "throughput": 5553.97, "total_tokens": 15279184} +{"current_steps": 31045, "total_steps": 37885, "loss": 0.0, "lr": 1.9216528809808841e-07, "epoch": 4.097268048040122, "percentage": 81.95, "elapsed_time": "0:45:51", "remaining_time": "0:10:06", "throughput": 5554.13, "total_tokens": 15281424} +{"current_steps": 31050, "total_steps": 37885, "loss": 0.0, "lr": 1.918938341725198e-07, "epoch": 4.09792793981787, "percentage": 81.96, "elapsed_time": "0:45:51", "remaining_time": "0:10:05", "throughput": 5554.39, "total_tokens": 15283984} +{"current_steps": 31055, "total_steps": 37885, "loss": 0.0004, "lr": 1.91622551761633e-07, "epoch": 4.098587831595618, "percentage": 81.97, "elapsed_time": "0:45:52", "remaining_time": "0:10:05", "throughput": 5554.66, "total_tokens": 15286544} +{"current_steps": 31060, "total_steps": 37885, "loss": 0.0001, "lr": 1.9135144092300604e-07, "epoch": 4.099247723373367, "percentage": 81.98, "elapsed_time": "0:45:52", "remaining_time": "0:10:04", "throughput": 5554.9, "total_tokens": 15289040} +{"current_steps": 31065, "total_steps": 37885, "loss": 0.0, "lr": 1.9108050171417967e-07, "epoch": 4.099907615151115, "percentage": 82.0, "elapsed_time": "0:45:52", "remaining_time": "0:10:04", "throughput": 5555.2, "total_tokens": 15291728} +{"current_steps": 31070, "total_steps": 37885, "loss": 0.0, "lr": 1.9080973419265922e-07, "epoch": 4.100567506928864, "percentage": 82.01, "elapsed_time": "0:45:53", "remaining_time": "0:10:03", "throughput": 5555.42, "total_tokens": 15294160} +{"current_steps": 31075, "total_steps": 37885, "loss": 0.0095, "lr": 1.9053913841591285e-07, "epoch": 4.101227398706612, "percentage": 82.02, "elapsed_time": "0:45:53", "remaining_time": "0:10:03", "throughput": 5555.61, "total_tokens": 15296528} +{"current_steps": 31080, "total_steps": 37885, "loss": 0.0, "lr": 1.9026871444137306e-07, "epoch": 4.101887290484361, "percentage": 82.04, "elapsed_time": "0:45:53", "remaining_time": "0:10:02", "throughput": 5555.81, "total_tokens": 15298896} +{"current_steps": 31085, "total_steps": 37885, "loss": 0.0, "lr": 1.8999846232643468e-07, "epoch": 4.102547182262109, "percentage": 82.05, "elapsed_time": "0:45:54", "remaining_time": "0:10:02", "throughput": 5556.09, "total_tokens": 15301584} +{"current_steps": 31090, "total_steps": 37885, "loss": 0.0, "lr": 1.897283821284571e-07, "epoch": 4.103207074039857, "percentage": 82.06, "elapsed_time": "0:45:54", "remaining_time": "0:10:01", "throughput": 5556.36, "total_tokens": 15304208} +{"current_steps": 31095, "total_steps": 37885, "loss": 0.0226, "lr": 1.894584739047631e-07, "epoch": 4.103866965817606, "percentage": 82.08, "elapsed_time": "0:45:54", "remaining_time": "0:10:01", "throughput": 5556.61, "total_tokens": 15306768} +{"current_steps": 31100, "total_steps": 37885, "loss": 0.0, "lr": 1.8918873771263842e-07, "epoch": 4.104526857595355, "percentage": 82.09, "elapsed_time": "0:45:55", "remaining_time": "0:10:01", "throughput": 5556.81, "total_tokens": 15309200} +{"current_steps": 31105, "total_steps": 37885, "loss": 0.0, "lr": 1.8891917360933262e-07, "epoch": 4.105186749373103, "percentage": 82.1, "elapsed_time": "0:45:55", "remaining_time": "0:10:00", "throughput": 5557.01, "total_tokens": 15311632} +{"current_steps": 31110, "total_steps": 37885, "loss": 0.0, "lr": 1.8864978165205892e-07, "epoch": 4.105846641150851, "percentage": 82.12, "elapsed_time": "0:45:55", "remaining_time": "0:10:00", "throughput": 5557.17, "total_tokens": 15313936} +{"current_steps": 31115, "total_steps": 37885, "loss": 0.0, "lr": 1.8838056189799388e-07, "epoch": 4.1065065329285995, "percentage": 82.13, "elapsed_time": "0:45:56", "remaining_time": "0:09:59", "throughput": 5557.37, "total_tokens": 15316368} +{"current_steps": 31120, "total_steps": 37885, "loss": 0.0004, "lr": 1.881115144042771e-07, "epoch": 4.107166424706348, "percentage": 82.14, "elapsed_time": "0:45:56", "remaining_time": "0:09:59", "throughput": 5557.55, "total_tokens": 15318736} +{"current_steps": 31125, "total_steps": 37885, "loss": 0.0001, "lr": 1.8784263922801212e-07, "epoch": 4.107826316484097, "percentage": 82.16, "elapsed_time": "0:45:56", "remaining_time": "0:09:58", "throughput": 5557.81, "total_tokens": 15321360} +{"current_steps": 31130, "total_steps": 37885, "loss": 0.0001, "lr": 1.8757393642626606e-07, "epoch": 4.108486208261845, "percentage": 82.17, "elapsed_time": "0:45:57", "remaining_time": "0:09:58", "throughput": 5557.97, "total_tokens": 15323664} +{"current_steps": 31135, "total_steps": 37885, "loss": 0.0, "lr": 1.873054060560686e-07, "epoch": 4.1091461000395935, "percentage": 82.18, "elapsed_time": "0:45:57", "remaining_time": "0:09:57", "throughput": 5558.11, "total_tokens": 15325904} +{"current_steps": 31140, "total_steps": 37885, "loss": 0.0, "lr": 1.870370481744137e-07, "epoch": 4.109805991817342, "percentage": 82.2, "elapsed_time": "0:45:57", "remaining_time": "0:09:57", "throughput": 5558.27, "total_tokens": 15328208} +{"current_steps": 31145, "total_steps": 37885, "loss": 0.0, "lr": 1.8676886283825843e-07, "epoch": 4.11046588359509, "percentage": 82.21, "elapsed_time": "0:45:58", "remaining_time": "0:09:56", "throughput": 5558.5, "total_tokens": 15330704} +{"current_steps": 31150, "total_steps": 37885, "loss": 0.0, "lr": 1.8650085010452288e-07, "epoch": 4.111125775372839, "percentage": 82.22, "elapsed_time": "0:45:58", "remaining_time": "0:09:56", "throughput": 5558.67, "total_tokens": 15333072} +{"current_steps": 31155, "total_steps": 37885, "loss": 0.0011, "lr": 1.8623301003009106e-07, "epoch": 4.1117856671505875, "percentage": 82.24, "elapsed_time": "0:45:58", "remaining_time": "0:09:55", "throughput": 5558.84, "total_tokens": 15335440} +{"current_steps": 31160, "total_steps": 37885, "loss": 0.0001, "lr": 1.8596534267180998e-07, "epoch": 4.112445558928336, "percentage": 82.25, "elapsed_time": "0:45:59", "remaining_time": "0:09:55", "throughput": 5559.19, "total_tokens": 15338320} +{"current_steps": 31165, "total_steps": 37885, "loss": 0.0, "lr": 1.8569784808649035e-07, "epoch": 4.113105450706084, "percentage": 82.26, "elapsed_time": "0:45:59", "remaining_time": "0:09:55", "throughput": 5559.49, "total_tokens": 15341072} +{"current_steps": 31170, "total_steps": 37885, "loss": 0.0294, "lr": 1.8543052633090582e-07, "epoch": 4.113765342483832, "percentage": 82.28, "elapsed_time": "0:45:59", "remaining_time": "0:09:54", "throughput": 5559.68, "total_tokens": 15343504} +{"current_steps": 31175, "total_steps": 37885, "loss": 0.0266, "lr": 1.8516337746179288e-07, "epoch": 4.1144252342615815, "percentage": 82.29, "elapsed_time": "0:46:00", "remaining_time": "0:09:54", "throughput": 5559.94, "total_tokens": 15346128} +{"current_steps": 31180, "total_steps": 37885, "loss": 0.0, "lr": 1.8489640153585296e-07, "epoch": 4.11508512603933, "percentage": 82.3, "elapsed_time": "0:46:00", "remaining_time": "0:09:53", "throughput": 5560.21, "total_tokens": 15348752} +{"current_steps": 31185, "total_steps": 37885, "loss": 0.0, "lr": 1.8462959860974914e-07, "epoch": 4.115745017817078, "percentage": 82.31, "elapsed_time": "0:46:00", "remaining_time": "0:09:53", "throughput": 5560.34, "total_tokens": 15350992} +{"current_steps": 31190, "total_steps": 37885, "loss": 0.0, "lr": 1.843629687401085e-07, "epoch": 4.116404909594826, "percentage": 82.33, "elapsed_time": "0:46:01", "remaining_time": "0:09:52", "throughput": 5560.51, "total_tokens": 15353360} +{"current_steps": 31195, "total_steps": 37885, "loss": 0.0, "lr": 1.840965119835216e-07, "epoch": 4.117064801372575, "percentage": 82.34, "elapsed_time": "0:46:01", "remaining_time": "0:09:52", "throughput": 5560.73, "total_tokens": 15355856} +{"current_steps": 31200, "total_steps": 37885, "loss": 0.0, "lr": 1.838302283965415e-07, "epoch": 4.117724693150324, "percentage": 82.35, "elapsed_time": "0:46:01", "remaining_time": "0:09:51", "throughput": 5560.93, "total_tokens": 15358288} +{"current_steps": 31205, "total_steps": 37885, "loss": 0.0, "lr": 1.835641180356855e-07, "epoch": 4.118384584928072, "percentage": 82.37, "elapsed_time": "0:46:02", "remaining_time": "0:09:51", "throughput": 5561.09, "total_tokens": 15360592} +{"current_steps": 31210, "total_steps": 37885, "loss": 0.0001, "lr": 1.8329818095743265e-07, "epoch": 4.11904447670582, "percentage": 82.38, "elapsed_time": "0:46:02", "remaining_time": "0:09:50", "throughput": 5561.24, "total_tokens": 15362896} +{"current_steps": 31215, "total_steps": 37885, "loss": 0.0, "lr": 1.8303241721822737e-07, "epoch": 4.119704368483569, "percentage": 82.39, "elapsed_time": "0:46:02", "remaining_time": "0:09:50", "throughput": 5561.46, "total_tokens": 15365328} +{"current_steps": 31220, "total_steps": 37885, "loss": 0.0426, "lr": 1.8276682687447553e-07, "epoch": 4.120364260261317, "percentage": 82.41, "elapsed_time": "0:46:03", "remaining_time": "0:09:49", "throughput": 5561.64, "total_tokens": 15367632} +{"current_steps": 31225, "total_steps": 37885, "loss": 0.0, "lr": 1.825014099825466e-07, "epoch": 4.121024152039066, "percentage": 82.42, "elapsed_time": "0:46:03", "remaining_time": "0:09:49", "throughput": 5561.88, "total_tokens": 15370128} +{"current_steps": 31230, "total_steps": 37885, "loss": 0.0, "lr": 1.822361665987734e-07, "epoch": 4.121684043816814, "percentage": 82.43, "elapsed_time": "0:46:03", "remaining_time": "0:09:48", "throughput": 5562.14, "total_tokens": 15372688} +{"current_steps": 31235, "total_steps": 37885, "loss": 0.0, "lr": 1.819710967794521e-07, "epoch": 4.122343935594563, "percentage": 82.45, "elapsed_time": "0:46:04", "remaining_time": "0:09:48", "throughput": 5562.34, "total_tokens": 15375056} +{"current_steps": 31240, "total_steps": 37885, "loss": 0.0, "lr": 1.8170620058084208e-07, "epoch": 4.123003827372311, "percentage": 82.46, "elapsed_time": "0:46:04", "remaining_time": "0:09:48", "throughput": 5562.58, "total_tokens": 15377552} +{"current_steps": 31245, "total_steps": 37885, "loss": 0.0, "lr": 1.814414780591651e-07, "epoch": 4.123663719150059, "percentage": 82.47, "elapsed_time": "0:46:04", "remaining_time": "0:09:47", "throughput": 5562.78, "total_tokens": 15379920} +{"current_steps": 31250, "total_steps": 37885, "loss": 0.0, "lr": 1.811769292706068e-07, "epoch": 4.124323610927807, "percentage": 82.49, "elapsed_time": "0:46:05", "remaining_time": "0:09:47", "throughput": 5562.96, "total_tokens": 15382224} +{"current_steps": 31255, "total_steps": 37885, "loss": 0.0, "lr": 1.8091255427131614e-07, "epoch": 4.124983502705557, "percentage": 82.5, "elapsed_time": "0:46:05", "remaining_time": "0:09:46", "throughput": 5563.26, "total_tokens": 15384912} +{"current_steps": 31260, "total_steps": 37885, "loss": 0.0, "lr": 1.8064835311740422e-07, "epoch": 4.125643394483305, "percentage": 82.51, "elapsed_time": "0:46:05", "remaining_time": "0:09:46", "throughput": 5563.43, "total_tokens": 15387216} +{"current_steps": 31265, "total_steps": 37885, "loss": 0.0035, "lr": 1.80384325864946e-07, "epoch": 4.126303286261053, "percentage": 82.53, "elapsed_time": "0:46:06", "remaining_time": "0:09:45", "throughput": 5563.64, "total_tokens": 15389648} +{"current_steps": 31270, "total_steps": 37885, "loss": 0.0001, "lr": 1.8012047256997977e-07, "epoch": 4.126963178038801, "percentage": 82.54, "elapsed_time": "0:46:06", "remaining_time": "0:09:45", "throughput": 5563.92, "total_tokens": 15392272} +{"current_steps": 31275, "total_steps": 37885, "loss": 0.0, "lr": 1.798567932885059e-07, "epoch": 4.12762306981655, "percentage": 82.55, "elapsed_time": "0:46:06", "remaining_time": "0:09:44", "throughput": 5564.2, "total_tokens": 15394896} +{"current_steps": 31280, "total_steps": 37885, "loss": 0.0343, "lr": 1.7959328807648856e-07, "epoch": 4.128282961594299, "percentage": 82.57, "elapsed_time": "0:46:07", "remaining_time": "0:09:44", "throughput": 5564.51, "total_tokens": 15397584} +{"current_steps": 31285, "total_steps": 37885, "loss": 0.0004, "lr": 1.7932995698985486e-07, "epoch": 4.128942853372047, "percentage": 82.58, "elapsed_time": "0:46:07", "remaining_time": "0:09:43", "throughput": 5564.77, "total_tokens": 15400144} +{"current_steps": 31290, "total_steps": 37885, "loss": 0.0, "lr": 1.7906680008449536e-07, "epoch": 4.129602745149795, "percentage": 82.59, "elapsed_time": "0:46:07", "remaining_time": "0:09:43", "throughput": 5565.07, "total_tokens": 15402832} +{"current_steps": 31295, "total_steps": 37885, "loss": 0.0002, "lr": 1.788038174162625e-07, "epoch": 4.130262636927544, "percentage": 82.61, "elapsed_time": "0:46:08", "remaining_time": "0:09:42", "throughput": 5565.3, "total_tokens": 15405328} +{"current_steps": 31300, "total_steps": 37885, "loss": 0.0005, "lr": 1.785410090409727e-07, "epoch": 4.130922528705292, "percentage": 82.62, "elapsed_time": "0:46:08", "remaining_time": "0:09:42", "throughput": 5565.57, "total_tokens": 15407952} +{"current_steps": 31305, "total_steps": 37885, "loss": 0.0001, "lr": 1.7827837501440556e-07, "epoch": 4.131582420483041, "percentage": 82.63, "elapsed_time": "0:46:08", "remaining_time": "0:09:41", "throughput": 5565.77, "total_tokens": 15410320} +{"current_steps": 31310, "total_steps": 37885, "loss": 0.0001, "lr": 1.7801591539230255e-07, "epoch": 4.132242312260789, "percentage": 82.64, "elapsed_time": "0:46:09", "remaining_time": "0:09:41", "throughput": 5565.97, "total_tokens": 15412688} +{"current_steps": 31315, "total_steps": 37885, "loss": 0.0338, "lr": 1.7775363023036916e-07, "epoch": 4.132902204038538, "percentage": 82.66, "elapsed_time": "0:46:09", "remaining_time": "0:09:41", "throughput": 5566.16, "total_tokens": 15415056} +{"current_steps": 31320, "total_steps": 37885, "loss": 0.0, "lr": 1.7749151958427379e-07, "epoch": 4.133562095816286, "percentage": 82.67, "elapsed_time": "0:46:09", "remaining_time": "0:09:40", "throughput": 5566.37, "total_tokens": 15417488} +{"current_steps": 31325, "total_steps": 37885, "loss": 0.0, "lr": 1.77229583509647e-07, "epoch": 4.134221987594034, "percentage": 82.68, "elapsed_time": "0:46:10", "remaining_time": "0:09:40", "throughput": 5566.55, "total_tokens": 15419792} +{"current_steps": 31330, "total_steps": 37885, "loss": 0.0, "lr": 1.7696782206208306e-07, "epoch": 4.134881879371783, "percentage": 82.7, "elapsed_time": "0:46:10", "remaining_time": "0:09:39", "throughput": 5566.85, "total_tokens": 15422480} +{"current_steps": 31335, "total_steps": 37885, "loss": 0.0001, "lr": 1.767062352971389e-07, "epoch": 4.135541771149532, "percentage": 82.71, "elapsed_time": "0:46:10", "remaining_time": "0:09:39", "throughput": 5567.02, "total_tokens": 15424784} +{"current_steps": 31340, "total_steps": 37885, "loss": 0.0, "lr": 1.7644482327033484e-07, "epoch": 4.13620166292728, "percentage": 82.72, "elapsed_time": "0:46:11", "remaining_time": "0:09:38", "throughput": 5567.29, "total_tokens": 15427344} +{"current_steps": 31345, "total_steps": 37885, "loss": 0.0, "lr": 1.761835860371532e-07, "epoch": 4.136861554705028, "percentage": 82.74, "elapsed_time": "0:46:11", "remaining_time": "0:09:38", "throughput": 5567.61, "total_tokens": 15430096} +{"current_steps": 31350, "total_steps": 37885, "loss": 0.0, "lr": 1.759225236530394e-07, "epoch": 4.1375214464827765, "percentage": 82.75, "elapsed_time": "0:46:11", "remaining_time": "0:09:37", "throughput": 5567.91, "total_tokens": 15432784} +{"current_steps": 31355, "total_steps": 37885, "loss": 0.0, "lr": 1.756616361734029e-07, "epoch": 4.138181338260526, "percentage": 82.76, "elapsed_time": "0:46:12", "remaining_time": "0:09:37", "throughput": 5567.99, "total_tokens": 15434832} +{"current_steps": 31360, "total_steps": 37885, "loss": 0.0205, "lr": 1.754009236536146e-07, "epoch": 4.138841230038274, "percentage": 82.78, "elapsed_time": "0:46:12", "remaining_time": "0:09:36", "throughput": 5568.21, "total_tokens": 15437264} +{"current_steps": 31365, "total_steps": 37885, "loss": 0.0, "lr": 1.7514038614900905e-07, "epoch": 4.139501121816022, "percentage": 82.79, "elapsed_time": "0:46:12", "remaining_time": "0:09:36", "throughput": 5568.52, "total_tokens": 15439952} +{"current_steps": 31370, "total_steps": 37885, "loss": 0.0, "lr": 1.748800237148833e-07, "epoch": 4.1401610135937705, "percentage": 82.8, "elapsed_time": "0:46:13", "remaining_time": "0:09:35", "throughput": 5568.67, "total_tokens": 15442192} +{"current_steps": 31375, "total_steps": 37885, "loss": 0.0, "lr": 1.7461983640649736e-07, "epoch": 4.140820905371519, "percentage": 82.82, "elapsed_time": "0:46:13", "remaining_time": "0:09:35", "throughput": 5568.86, "total_tokens": 15444560} +{"current_steps": 31380, "total_steps": 37885, "loss": 0.0, "lr": 1.7435982427907446e-07, "epoch": 4.141480797149267, "percentage": 82.83, "elapsed_time": "0:46:13", "remaining_time": "0:09:34", "throughput": 5569.1, "total_tokens": 15447056} +{"current_steps": 31385, "total_steps": 37885, "loss": 0.0, "lr": 1.7409998738779962e-07, "epoch": 4.142140688927016, "percentage": 82.84, "elapsed_time": "0:46:14", "remaining_time": "0:09:34", "throughput": 5569.38, "total_tokens": 15449680} +{"current_steps": 31390, "total_steps": 37885, "loss": 0.0, "lr": 1.7384032578782216e-07, "epoch": 4.1428005807047645, "percentage": 82.86, "elapsed_time": "0:46:14", "remaining_time": "0:09:34", "throughput": 5569.57, "total_tokens": 15452048} +{"current_steps": 31395, "total_steps": 37885, "loss": 0.0, "lr": 1.7358083953425306e-07, "epoch": 4.143460472482513, "percentage": 82.87, "elapsed_time": "0:46:14", "remaining_time": "0:09:33", "throughput": 5569.87, "total_tokens": 15454736} +{"current_steps": 31400, "total_steps": 37885, "loss": 0.0001, "lr": 1.7332152868216598e-07, "epoch": 4.144120364260261, "percentage": 82.88, "elapsed_time": "0:46:15", "remaining_time": "0:09:33", "throughput": 5570.11, "total_tokens": 15457232} +{"current_steps": 31405, "total_steps": 37885, "loss": 0.0, "lr": 1.7306239328659822e-07, "epoch": 4.144780256038009, "percentage": 82.9, "elapsed_time": "0:46:15", "remaining_time": "0:09:32", "throughput": 5570.34, "total_tokens": 15459728} +{"current_steps": 31410, "total_steps": 37885, "loss": 0.0, "lr": 1.728034334025491e-07, "epoch": 4.1454401478157585, "percentage": 82.91, "elapsed_time": "0:46:15", "remaining_time": "0:09:32", "throughput": 5570.54, "total_tokens": 15462096} +{"current_steps": 31415, "total_steps": 37885, "loss": 0.0511, "lr": 1.7254464908498156e-07, "epoch": 4.146100039593507, "percentage": 82.92, "elapsed_time": "0:46:16", "remaining_time": "0:09:31", "throughput": 5570.82, "total_tokens": 15464720} +{"current_steps": 31420, "total_steps": 37885, "loss": 0.0003, "lr": 1.7228604038882e-07, "epoch": 4.146759931371255, "percentage": 82.94, "elapsed_time": "0:46:16", "remaining_time": "0:09:31", "throughput": 5571.0, "total_tokens": 15467024} +{"current_steps": 31425, "total_steps": 37885, "loss": 0.0, "lr": 1.720276073689525e-07, "epoch": 4.147419823149003, "percentage": 82.95, "elapsed_time": "0:46:16", "remaining_time": "0:09:30", "throughput": 5571.23, "total_tokens": 15469520} +{"current_steps": 31430, "total_steps": 37885, "loss": 0.0411, "lr": 1.7176935008022986e-07, "epoch": 4.148079714926752, "percentage": 82.96, "elapsed_time": "0:46:17", "remaining_time": "0:09:30", "throughput": 5571.4, "total_tokens": 15471824} +{"current_steps": 31435, "total_steps": 37885, "loss": 0.0001, "lr": 1.715112685774649e-07, "epoch": 4.148739606704501, "percentage": 82.97, "elapsed_time": "0:46:17", "remaining_time": "0:09:29", "throughput": 5571.53, "total_tokens": 15474000} +{"current_steps": 31440, "total_steps": 37885, "loss": 0.0, "lr": 1.7125336291543368e-07, "epoch": 4.149399498482249, "percentage": 82.99, "elapsed_time": "0:46:17", "remaining_time": "0:09:29", "throughput": 5571.78, "total_tokens": 15476560} +{"current_steps": 31445, "total_steps": 37885, "loss": 0.0426, "lr": 1.7099563314887498e-07, "epoch": 4.150059390259997, "percentage": 83.0, "elapsed_time": "0:46:17", "remaining_time": "0:09:28", "throughput": 5571.91, "total_tokens": 15478736} +{"current_steps": 31450, "total_steps": 37885, "loss": 0.0, "lr": 1.7073807933249008e-07, "epoch": 4.150719282037746, "percentage": 83.01, "elapsed_time": "0:46:18", "remaining_time": "0:09:28", "throughput": 5572.05, "total_tokens": 15480976} +{"current_steps": 31455, "total_steps": 37885, "loss": 0.0595, "lr": 1.7048070152094263e-07, "epoch": 4.151379173815494, "percentage": 83.03, "elapsed_time": "0:46:18", "remaining_time": "0:09:28", "throughput": 5572.3, "total_tokens": 15483536} +{"current_steps": 31460, "total_steps": 37885, "loss": 0.0001, "lr": 1.7022349976885941e-07, "epoch": 4.152039065593243, "percentage": 83.04, "elapsed_time": "0:46:18", "remaining_time": "0:09:27", "throughput": 5572.53, "total_tokens": 15486032} +{"current_steps": 31465, "total_steps": 37885, "loss": 0.0519, "lr": 1.6996647413082977e-07, "epoch": 4.152698957370991, "percentage": 83.05, "elapsed_time": "0:46:19", "remaining_time": "0:09:27", "throughput": 5572.88, "total_tokens": 15488912} +{"current_steps": 31470, "total_steps": 37885, "loss": 0.0, "lr": 1.6970962466140514e-07, "epoch": 4.15335884914874, "percentage": 83.07, "elapsed_time": "0:46:19", "remaining_time": "0:09:26", "throughput": 5573.1, "total_tokens": 15491408} +{"current_steps": 31475, "total_steps": 37885, "loss": 0.0, "lr": 1.6945295141510018e-07, "epoch": 4.154018740926488, "percentage": 83.08, "elapsed_time": "0:46:20", "remaining_time": "0:09:26", "throughput": 5573.29, "total_tokens": 15493776} +{"current_steps": 31480, "total_steps": 37885, "loss": 0.0, "lr": 1.691964544463922e-07, "epoch": 4.154678632704236, "percentage": 83.09, "elapsed_time": "0:46:20", "remaining_time": "0:09:25", "throughput": 5573.52, "total_tokens": 15496272} +{"current_steps": 31485, "total_steps": 37885, "loss": 0.0, "lr": 1.6894013380972028e-07, "epoch": 4.155338524481985, "percentage": 83.11, "elapsed_time": "0:46:20", "remaining_time": "0:09:25", "throughput": 5573.66, "total_tokens": 15498512} +{"current_steps": 31490, "total_steps": 37885, "loss": 0.0, "lr": 1.6868398955948693e-07, "epoch": 4.155998416259734, "percentage": 83.12, "elapsed_time": "0:46:21", "remaining_time": "0:09:24", "throughput": 5573.89, "total_tokens": 15501008} +{"current_steps": 31495, "total_steps": 37885, "loss": 0.0, "lr": 1.684280217500569e-07, "epoch": 4.156658308037482, "percentage": 83.13, "elapsed_time": "0:46:21", "remaining_time": "0:09:24", "throughput": 5574.05, "total_tokens": 15503312} +{"current_steps": 31500, "total_steps": 37885, "loss": 0.0005, "lr": 1.6817223043575768e-07, "epoch": 4.15731819981523, "percentage": 83.15, "elapsed_time": "0:46:21", "remaining_time": "0:09:23", "throughput": 5574.34, "total_tokens": 15506000} +{"current_steps": 31505, "total_steps": 37885, "loss": 0.0253, "lr": 1.6791661567087888e-07, "epoch": 4.1579780915929785, "percentage": 83.16, "elapsed_time": "0:46:22", "remaining_time": "0:09:23", "throughput": 5574.65, "total_tokens": 15508752} +{"current_steps": 31510, "total_steps": 37885, "loss": 0.0, "lr": 1.6766117750967244e-07, "epoch": 4.158637983370728, "percentage": 83.17, "elapsed_time": "0:46:22", "remaining_time": "0:09:22", "throughput": 5574.94, "total_tokens": 15511440} +{"current_steps": 31515, "total_steps": 37885, "loss": 0.0, "lr": 1.6740591600635433e-07, "epoch": 4.159297875148476, "percentage": 83.19, "elapsed_time": "0:46:22", "remaining_time": "0:09:22", "throughput": 5575.13, "total_tokens": 15513808} +{"current_steps": 31520, "total_steps": 37885, "loss": 0.0, "lr": 1.671508312151011e-07, "epoch": 4.159957766926224, "percentage": 83.2, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.42, "total_tokens": 15516496} +{"current_steps": 31525, "total_steps": 37885, "loss": 0.0645, "lr": 1.6689592319005296e-07, "epoch": 4.1606176587039725, "percentage": 83.21, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.66, "total_tokens": 15519056} +{"current_steps": 31530, "total_steps": 37885, "loss": 0.0001, "lr": 1.6664119198531245e-07, "epoch": 4.161277550481721, "percentage": 83.23, "elapsed_time": "0:46:23", "remaining_time": "0:09:21", "throughput": 5575.73, "total_tokens": 15521104} +{"current_steps": 31535, "total_steps": 37885, "loss": 0.0294, "lr": 1.6638663765494398e-07, "epoch": 4.161937442259469, "percentage": 83.24, "elapsed_time": "0:46:24", "remaining_time": "0:09:20", "throughput": 5575.88, "total_tokens": 15523344} +{"current_steps": 31540, "total_steps": 37885, "loss": 0.0, "lr": 1.6613226025297545e-07, "epoch": 4.162597334037218, "percentage": 83.25, "elapsed_time": "0:46:24", "remaining_time": "0:09:20", "throughput": 5576.11, "total_tokens": 15525840} +{"current_steps": 31545, "total_steps": 37885, "loss": 0.0, "lr": 1.6587805983339564e-07, "epoch": 4.1632572258149665, "percentage": 83.27, "elapsed_time": "0:46:24", "remaining_time": "0:09:19", "throughput": 5576.27, "total_tokens": 15528144} +{"current_steps": 31550, "total_steps": 37885, "loss": 0.0, "lr": 1.65624036450158e-07, "epoch": 4.163917117592715, "percentage": 83.28, "elapsed_time": "0:46:25", "remaining_time": "0:09:19", "throughput": 5576.46, "total_tokens": 15530512} +{"current_steps": 31555, "total_steps": 37885, "loss": 0.0, "lr": 1.6537019015717647e-07, "epoch": 4.164577009370463, "percentage": 83.29, "elapsed_time": "0:46:25", "remaining_time": "0:09:18", "throughput": 5576.64, "total_tokens": 15532880} +{"current_steps": 31560, "total_steps": 37885, "loss": 0.0, "lr": 1.6511652100832797e-07, "epoch": 4.165236901148211, "percentage": 83.3, "elapsed_time": "0:46:25", "remaining_time": "0:09:18", "throughput": 5576.89, "total_tokens": 15535440} +{"current_steps": 31565, "total_steps": 37885, "loss": 0.0, "lr": 1.648630290574522e-07, "epoch": 4.1658967929259605, "percentage": 83.32, "elapsed_time": "0:46:26", "remaining_time": "0:09:17", "throughput": 5577.13, "total_tokens": 15538000} +{"current_steps": 31570, "total_steps": 37885, "loss": 0.02, "lr": 1.646097143583508e-07, "epoch": 4.166556684703709, "percentage": 83.33, "elapsed_time": "0:46:26", "remaining_time": "0:09:17", "throughput": 5577.42, "total_tokens": 15540688} +{"current_steps": 31575, "total_steps": 37885, "loss": 0.0252, "lr": 1.6435657696478844e-07, "epoch": 4.167216576481457, "percentage": 83.34, "elapsed_time": "0:46:26", "remaining_time": "0:09:16", "throughput": 5577.61, "total_tokens": 15543120} +{"current_steps": 31580, "total_steps": 37885, "loss": 0.0112, "lr": 1.6410361693049114e-07, "epoch": 4.167876468259205, "percentage": 83.36, "elapsed_time": "0:46:27", "remaining_time": "0:09:16", "throughput": 5577.69, "total_tokens": 15545232} +{"current_steps": 31585, "total_steps": 37885, "loss": 0.0, "lr": 1.6385083430914792e-07, "epoch": 4.168536360036954, "percentage": 83.37, "elapsed_time": "0:46:27", "remaining_time": "0:09:15", "throughput": 5577.97, "total_tokens": 15547920} +{"current_steps": 31590, "total_steps": 37885, "loss": 0.0456, "lr": 1.6359822915441058e-07, "epoch": 4.169196251814703, "percentage": 83.38, "elapsed_time": "0:46:27", "remaining_time": "0:09:15", "throughput": 5578.13, "total_tokens": 15550224} +{"current_steps": 31595, "total_steps": 37885, "loss": 0.0, "lr": 1.6334580151989207e-07, "epoch": 4.169856143592451, "percentage": 83.4, "elapsed_time": "0:46:28", "remaining_time": "0:09:15", "throughput": 5578.32, "total_tokens": 15552656} +{"current_steps": 31600, "total_steps": 37885, "loss": 0.0, "lr": 1.630935514591686e-07, "epoch": 4.170516035370199, "percentage": 83.41, "elapsed_time": "0:46:28", "remaining_time": "0:09:14", "throughput": 5578.58, "total_tokens": 15555280} +{"current_steps": 31605, "total_steps": 37885, "loss": 0.0, "lr": 1.6284147902577872e-07, "epoch": 4.171175927147948, "percentage": 83.42, "elapsed_time": "0:46:28", "remaining_time": "0:09:14", "throughput": 5578.79, "total_tokens": 15557776} +{"current_steps": 31610, "total_steps": 37885, "loss": 0.0001, "lr": 1.6258958427322234e-07, "epoch": 4.171835818925696, "percentage": 83.44, "elapsed_time": "0:46:29", "remaining_time": "0:09:13", "throughput": 5578.99, "total_tokens": 15560208} +{"current_steps": 31615, "total_steps": 37885, "loss": 0.0002, "lr": 1.623378672549628e-07, "epoch": 4.172495710703445, "percentage": 83.45, "elapsed_time": "0:46:29", "remaining_time": "0:09:13", "throughput": 5579.23, "total_tokens": 15562768} +{"current_steps": 31620, "total_steps": 37885, "loss": 0.0, "lr": 1.620863280244249e-07, "epoch": 4.173155602481193, "percentage": 83.46, "elapsed_time": "0:46:29", "remaining_time": "0:09:12", "throughput": 5579.47, "total_tokens": 15565328} +{"current_steps": 31625, "total_steps": 37885, "loss": 0.0005, "lr": 1.6183496663499652e-07, "epoch": 4.173815494258942, "percentage": 83.48, "elapsed_time": "0:46:30", "remaining_time": "0:09:12", "throughput": 5579.62, "total_tokens": 15567632} +{"current_steps": 31630, "total_steps": 37885, "loss": 0.0, "lr": 1.6158378314002673e-07, "epoch": 4.17447538603669, "percentage": 83.49, "elapsed_time": "0:46:30", "remaining_time": "0:09:11", "throughput": 5579.82, "total_tokens": 15570064} +{"current_steps": 31635, "total_steps": 37885, "loss": 0.0, "lr": 1.613327775928276e-07, "epoch": 4.175135277814438, "percentage": 83.5, "elapsed_time": "0:46:30", "remaining_time": "0:09:11", "throughput": 5580.05, "total_tokens": 15572624} +{"current_steps": 31640, "total_steps": 37885, "loss": 0.0, "lr": 1.6108195004667357e-07, "epoch": 4.175795169592186, "percentage": 83.52, "elapsed_time": "0:46:31", "remaining_time": "0:09:10", "throughput": 5580.12, "total_tokens": 15574672} +{"current_steps": 31645, "total_steps": 37885, "loss": 0.0002, "lr": 1.6083130055480033e-07, "epoch": 4.176455061369936, "percentage": 83.53, "elapsed_time": "0:46:31", "remaining_time": "0:09:10", "throughput": 5580.44, "total_tokens": 15577488} +{"current_steps": 31650, "total_steps": 37885, "loss": 0.0, "lr": 1.6058082917040682e-07, "epoch": 4.177114953147684, "percentage": 83.54, "elapsed_time": "0:46:31", "remaining_time": "0:09:09", "throughput": 5580.64, "total_tokens": 15579920} +{"current_steps": 31655, "total_steps": 37885, "loss": 0.0, "lr": 1.6033053594665402e-07, "epoch": 4.177774844925432, "percentage": 83.56, "elapsed_time": "0:46:32", "remaining_time": "0:09:09", "throughput": 5580.81, "total_tokens": 15582224} +{"current_steps": 31660, "total_steps": 37885, "loss": 0.0, "lr": 1.6008042093666428e-07, "epoch": 4.17843473670318, "percentage": 83.57, "elapsed_time": "0:46:32", "remaining_time": "0:09:09", "throughput": 5581.02, "total_tokens": 15584656} +{"current_steps": 31665, "total_steps": 37885, "loss": 0.0, "lr": 1.5983048419352297e-07, "epoch": 4.179094628480929, "percentage": 83.58, "elapsed_time": "0:46:32", "remaining_time": "0:09:08", "throughput": 5581.21, "total_tokens": 15587024} +{"current_steps": 31670, "total_steps": 37885, "loss": 0.0002, "lr": 1.5958072577027738e-07, "epoch": 4.179754520258678, "percentage": 83.6, "elapsed_time": "0:46:33", "remaining_time": "0:09:08", "throughput": 5581.48, "total_tokens": 15589648} +{"current_steps": 31675, "total_steps": 37885, "loss": 0.0, "lr": 1.5933114571993712e-07, "epoch": 4.180414412036426, "percentage": 83.61, "elapsed_time": "0:46:33", "remaining_time": "0:09:07", "throughput": 5581.81, "total_tokens": 15592464} +{"current_steps": 31680, "total_steps": 37885, "loss": 0.0381, "lr": 1.5908174409547347e-07, "epoch": 4.181074303814174, "percentage": 83.62, "elapsed_time": "0:46:33", "remaining_time": "0:09:07", "throughput": 5582.07, "total_tokens": 15595024} +{"current_steps": 31685, "total_steps": 37885, "loss": 0.0677, "lr": 1.588325209498198e-07, "epoch": 4.181734195591923, "percentage": 83.63, "elapsed_time": "0:46:34", "remaining_time": "0:09:06", "throughput": 5582.34, "total_tokens": 15597648} +{"current_steps": 31690, "total_steps": 37885, "loss": 0.0, "lr": 1.5858347633587277e-07, "epoch": 4.182394087369671, "percentage": 83.65, "elapsed_time": "0:46:34", "remaining_time": "0:09:06", "throughput": 5582.59, "total_tokens": 15600208} +{"current_steps": 31695, "total_steps": 37885, "loss": 0.0, "lr": 1.5833461030648954e-07, "epoch": 4.18305397914742, "percentage": 83.66, "elapsed_time": "0:46:34", "remaining_time": "0:09:05", "throughput": 5582.84, "total_tokens": 15602768} +{"current_steps": 31700, "total_steps": 37885, "loss": 0.0207, "lr": 1.5808592291449074e-07, "epoch": 4.183713870925168, "percentage": 83.67, "elapsed_time": "0:46:35", "remaining_time": "0:09:05", "throughput": 5583.14, "total_tokens": 15605456} +{"current_steps": 31705, "total_steps": 37885, "loss": 0.0003, "lr": 1.5783741421265784e-07, "epoch": 4.184373762702917, "percentage": 83.69, "elapsed_time": "0:46:35", "remaining_time": "0:09:04", "throughput": 5583.4, "total_tokens": 15608016} +{"current_steps": 31710, "total_steps": 37885, "loss": 0.0, "lr": 1.575890842537353e-07, "epoch": 4.185033654480665, "percentage": 83.7, "elapsed_time": "0:46:35", "remaining_time": "0:09:04", "throughput": 5583.55, "total_tokens": 15610256} +{"current_steps": 31715, "total_steps": 37885, "loss": 0.0, "lr": 1.573409330904296e-07, "epoch": 4.185693546258413, "percentage": 83.71, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5583.76, "total_tokens": 15612688} +{"current_steps": 31720, "total_steps": 37885, "loss": 0.0579, "lr": 1.5709296077540835e-07, "epoch": 4.1863534380361624, "percentage": 83.73, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5584.06, "total_tokens": 15615376} +{"current_steps": 31725, "total_steps": 37885, "loss": 0.0441, "lr": 1.5684516736130283e-07, "epoch": 4.187013329813911, "percentage": 83.74, "elapsed_time": "0:46:36", "remaining_time": "0:09:03", "throughput": 5584.22, "total_tokens": 15617680} +{"current_steps": 31730, "total_steps": 37885, "loss": 0.0, "lr": 1.5659755290070453e-07, "epoch": 4.187673221591659, "percentage": 83.75, "elapsed_time": "0:46:37", "remaining_time": "0:09:02", "throughput": 5584.54, "total_tokens": 15620432} +{"current_steps": 31735, "total_steps": 37885, "loss": 0.0, "lr": 1.5635011744616854e-07, "epoch": 4.188333113369407, "percentage": 83.77, "elapsed_time": "0:46:37", "remaining_time": "0:09:02", "throughput": 5584.71, "total_tokens": 15622736} +{"current_steps": 31740, "total_steps": 37885, "loss": 0.0, "lr": 1.5610286105021063e-07, "epoch": 4.188993005147156, "percentage": 83.78, "elapsed_time": "0:46:37", "remaining_time": "0:09:01", "throughput": 5585.0, "total_tokens": 15625424} +{"current_steps": 31745, "total_steps": 37885, "loss": 0.0003, "lr": 1.5585578376530938e-07, "epoch": 4.189652896924905, "percentage": 83.79, "elapsed_time": "0:46:38", "remaining_time": "0:09:01", "throughput": 5585.23, "total_tokens": 15627920} +{"current_steps": 31750, "total_steps": 37885, "loss": 0.0, "lr": 1.556088856439055e-07, "epoch": 4.190312788702653, "percentage": 83.81, "elapsed_time": "0:46:38", "remaining_time": "0:09:00", "throughput": 5585.44, "total_tokens": 15630352} +{"current_steps": 31755, "total_steps": 37885, "loss": 0.0518, "lr": 1.5536216673840084e-07, "epoch": 4.190972680480401, "percentage": 83.82, "elapsed_time": "0:46:38", "remaining_time": "0:09:00", "throughput": 5585.68, "total_tokens": 15632848} +{"current_steps": 31760, "total_steps": 37885, "loss": 0.0, "lr": 1.551156271011599e-07, "epoch": 4.19163257225815, "percentage": 83.83, "elapsed_time": "0:46:39", "remaining_time": "0:08:59", "throughput": 5585.91, "total_tokens": 15635344} +{"current_steps": 31765, "total_steps": 37885, "loss": 0.0, "lr": 1.5486926678450907e-07, "epoch": 4.192292464035898, "percentage": 83.85, "elapsed_time": "0:46:39", "remaining_time": "0:08:59", "throughput": 5586.14, "total_tokens": 15637840} +{"current_steps": 31770, "total_steps": 37885, "loss": 0.0, "lr": 1.5462308584073625e-07, "epoch": 4.192952355813647, "percentage": 83.86, "elapsed_time": "0:46:39", "remaining_time": "0:08:58", "throughput": 5586.35, "total_tokens": 15640272} +{"current_steps": 31775, "total_steps": 37885, "loss": 0.0, "lr": 1.5437708432209174e-07, "epoch": 4.193612247591395, "percentage": 83.87, "elapsed_time": "0:46:40", "remaining_time": "0:08:58", "throughput": 5586.6, "total_tokens": 15642832} +{"current_steps": 31780, "total_steps": 37885, "loss": 0.0, "lr": 1.5413126228078755e-07, "epoch": 4.194272139369144, "percentage": 83.89, "elapsed_time": "0:46:40", "remaining_time": "0:08:57", "throughput": 5586.78, "total_tokens": 15645136} +{"current_steps": 31785, "total_steps": 37885, "loss": 0.0, "lr": 1.5388561976899784e-07, "epoch": 4.194932031146892, "percentage": 83.9, "elapsed_time": "0:46:40", "remaining_time": "0:08:57", "throughput": 5586.92, "total_tokens": 15647376} +{"current_steps": 31790, "total_steps": 37885, "loss": 0.0, "lr": 1.53640156838858e-07, "epoch": 4.19559192292464, "percentage": 83.91, "elapsed_time": "0:46:41", "remaining_time": "0:08:57", "throughput": 5587.07, "total_tokens": 15649616} +{"current_steps": 31795, "total_steps": 37885, "loss": 0.0, "lr": 1.5339487354246605e-07, "epoch": 4.196251814702388, "percentage": 83.93, "elapsed_time": "0:46:41", "remaining_time": "0:08:56", "throughput": 5587.28, "total_tokens": 15652048} +{"current_steps": 31800, "total_steps": 37885, "loss": 0.001, "lr": 1.5314976993188177e-07, "epoch": 4.196911706480138, "percentage": 83.94, "elapsed_time": "0:46:41", "remaining_time": "0:08:56", "throughput": 5587.43, "total_tokens": 15654288} +{"current_steps": 31805, "total_steps": 37885, "loss": 0.0, "lr": 1.5290484605912624e-07, "epoch": 4.197571598257886, "percentage": 83.95, "elapsed_time": "0:46:42", "remaining_time": "0:08:55", "throughput": 5587.66, "total_tokens": 15656784} +{"current_steps": 31810, "total_steps": 37885, "loss": 0.0, "lr": 1.5266010197618296e-07, "epoch": 4.198231490035634, "percentage": 83.96, "elapsed_time": "0:46:42", "remaining_time": "0:08:55", "throughput": 5587.97, "total_tokens": 15659536} +{"current_steps": 31815, "total_steps": 37885, "loss": 0.001, "lr": 1.5241553773499727e-07, "epoch": 4.198891381813382, "percentage": 83.98, "elapsed_time": "0:46:42", "remaining_time": "0:08:54", "throughput": 5588.12, "total_tokens": 15661776} +{"current_steps": 31820, "total_steps": 37885, "loss": 0.0, "lr": 1.5217115338747577e-07, "epoch": 4.199551273591131, "percentage": 83.99, "elapsed_time": "0:46:43", "remaining_time": "0:08:54", "throughput": 5588.33, "total_tokens": 15664208} +{"current_steps": 31825, "total_steps": 37885, "loss": 0.0132, "lr": 1.5192694898548742e-07, "epoch": 4.20021116536888, "percentage": 84.0, "elapsed_time": "0:46:43", "remaining_time": "0:08:53", "throughput": 5588.51, "total_tokens": 15666576} +{"current_steps": 31830, "total_steps": 37885, "loss": 0.0, "lr": 1.5168292458086286e-07, "epoch": 4.200871057146628, "percentage": 84.02, "elapsed_time": "0:46:43", "remaining_time": "0:08:53", "throughput": 5588.68, "total_tokens": 15668880} +{"current_steps": 31835, "total_steps": 37885, "loss": 0.028, "lr": 1.5143908022539487e-07, "epoch": 4.201530948924376, "percentage": 84.03, "elapsed_time": "0:46:44", "remaining_time": "0:08:52", "throughput": 5588.83, "total_tokens": 15671120} +{"current_steps": 31840, "total_steps": 37885, "loss": 0.0001, "lr": 1.5119541597083718e-07, "epoch": 4.202190840702125, "percentage": 84.04, "elapsed_time": "0:46:44", "remaining_time": "0:08:52", "throughput": 5588.99, "total_tokens": 15673424} +{"current_steps": 31845, "total_steps": 37885, "loss": 0.0, "lr": 1.5095193186890554e-07, "epoch": 4.202850732479873, "percentage": 84.06, "elapsed_time": "0:46:44", "remaining_time": "0:08:51", "throughput": 5589.29, "total_tokens": 15676112} +{"current_steps": 31850, "total_steps": 37885, "loss": 0.0006, "lr": 1.5070862797127847e-07, "epoch": 4.203510624257622, "percentage": 84.07, "elapsed_time": "0:46:45", "remaining_time": "0:08:51", "throughput": 5589.52, "total_tokens": 15678608} +{"current_steps": 31855, "total_steps": 37885, "loss": 0.0074, "lr": 1.504655043295948e-07, "epoch": 4.20417051603537, "percentage": 84.08, "elapsed_time": "0:46:45", "remaining_time": "0:08:51", "throughput": 5589.7, "total_tokens": 15680976} +{"current_steps": 31860, "total_steps": 37885, "loss": 0.0, "lr": 1.5022256099545594e-07, "epoch": 4.204830407813119, "percentage": 84.1, "elapsed_time": "0:46:45", "remaining_time": "0:08:50", "throughput": 5589.88, "total_tokens": 15683280} +{"current_steps": 31865, "total_steps": 37885, "loss": 0.0, "lr": 1.4997979802042515e-07, "epoch": 4.205490299590867, "percentage": 84.11, "elapsed_time": "0:46:45", "remaining_time": "0:08:50", "throughput": 5590.06, "total_tokens": 15685648} +{"current_steps": 31870, "total_steps": 37885, "loss": 0.0, "lr": 1.4973721545602668e-07, "epoch": 4.206150191368615, "percentage": 84.12, "elapsed_time": "0:46:46", "remaining_time": "0:08:49", "throughput": 5590.34, "total_tokens": 15688272} +{"current_steps": 31875, "total_steps": 37885, "loss": 0.0001, "lr": 1.4949481335374736e-07, "epoch": 4.206810083146364, "percentage": 84.14, "elapsed_time": "0:46:46", "remaining_time": "0:08:49", "throughput": 5590.56, "total_tokens": 15690768} +{"current_steps": 31880, "total_steps": 37885, "loss": 0.0, "lr": 1.4925259176503446e-07, "epoch": 4.207469974924113, "percentage": 84.15, "elapsed_time": "0:46:46", "remaining_time": "0:08:48", "throughput": 5590.85, "total_tokens": 15693456} +{"current_steps": 31885, "total_steps": 37885, "loss": 0.0, "lr": 1.4901055074129888e-07, "epoch": 4.208129866701861, "percentage": 84.16, "elapsed_time": "0:46:47", "remaining_time": "0:08:48", "throughput": 5591.06, "total_tokens": 15695888} +{"current_steps": 31890, "total_steps": 37885, "loss": 0.0, "lr": 1.487686903339115e-07, "epoch": 4.208789758479609, "percentage": 84.18, "elapsed_time": "0:46:47", "remaining_time": "0:08:47", "throughput": 5591.19, "total_tokens": 15698064} +{"current_steps": 31895, "total_steps": 37885, "loss": 0.0, "lr": 1.4852701059420526e-07, "epoch": 4.2094496502573575, "percentage": 84.19, "elapsed_time": "0:46:47", "remaining_time": "0:08:47", "throughput": 5591.36, "total_tokens": 15700368} +{"current_steps": 31900, "total_steps": 37885, "loss": 0.0, "lr": 1.4828551157347514e-07, "epoch": 4.210109542035106, "percentage": 84.2, "elapsed_time": "0:46:48", "remaining_time": "0:08:46", "throughput": 5591.59, "total_tokens": 15702864} +{"current_steps": 31905, "total_steps": 37885, "loss": 0.0, "lr": 1.4804419332297746e-07, "epoch": 4.210769433812855, "percentage": 84.22, "elapsed_time": "0:46:48", "remaining_time": "0:08:46", "throughput": 5591.74, "total_tokens": 15705104} +{"current_steps": 31910, "total_steps": 37885, "loss": 0.0, "lr": 1.478030558939307e-07, "epoch": 4.211429325590603, "percentage": 84.23, "elapsed_time": "0:46:48", "remaining_time": "0:08:45", "throughput": 5591.89, "total_tokens": 15707344} +{"current_steps": 31915, "total_steps": 37885, "loss": 0.0, "lr": 1.4756209933751396e-07, "epoch": 4.2120892173683515, "percentage": 84.24, "elapsed_time": "0:46:49", "remaining_time": "0:08:45", "throughput": 5592.14, "total_tokens": 15709904} +{"current_steps": 31920, "total_steps": 37885, "loss": 0.0, "lr": 1.4732132370486872e-07, "epoch": 4.2127491091461, "percentage": 84.25, "elapsed_time": "0:46:49", "remaining_time": "0:08:45", "throughput": 5592.33, "total_tokens": 15712272} +{"current_steps": 31925, "total_steps": 37885, "loss": 0.0, "lr": 1.4708072904709812e-07, "epoch": 4.213409000923848, "percentage": 84.27, "elapsed_time": "0:46:49", "remaining_time": "0:08:44", "throughput": 5592.6, "total_tokens": 15714896} +{"current_steps": 31930, "total_steps": 37885, "loss": 0.0011, "lr": 1.468403154152663e-07, "epoch": 4.214068892701597, "percentage": 84.28, "elapsed_time": "0:46:50", "remaining_time": "0:08:44", "throughput": 5592.85, "total_tokens": 15717456} +{"current_steps": 31935, "total_steps": 37885, "loss": 0.0113, "lr": 1.4660008286039937e-07, "epoch": 4.2147287844793455, "percentage": 84.29, "elapsed_time": "0:46:50", "remaining_time": "0:08:43", "throughput": 5593.1, "total_tokens": 15720016} +{"current_steps": 31940, "total_steps": 37885, "loss": 0.0, "lr": 1.4636003143348518e-07, "epoch": 4.215388676257094, "percentage": 84.31, "elapsed_time": "0:46:50", "remaining_time": "0:08:43", "throughput": 5593.27, "total_tokens": 15722320} +{"current_steps": 31945, "total_steps": 37885, "loss": 0.0, "lr": 1.4612016118547265e-07, "epoch": 4.216048568034842, "percentage": 84.32, "elapsed_time": "0:46:51", "remaining_time": "0:08:42", "throughput": 5593.51, "total_tokens": 15724816} +{"current_steps": 31950, "total_steps": 37885, "loss": 0.0396, "lr": 1.4588047216727251e-07, "epoch": 4.21670845981259, "percentage": 84.33, "elapsed_time": "0:46:51", "remaining_time": "0:08:42", "throughput": 5593.79, "total_tokens": 15727440} +{"current_steps": 31955, "total_steps": 37885, "loss": 0.0, "lr": 1.4564096442975715e-07, "epoch": 4.2173683515903395, "percentage": 84.35, "elapsed_time": "0:46:51", "remaining_time": "0:08:41", "throughput": 5593.96, "total_tokens": 15729744} +{"current_steps": 31960, "total_steps": 37885, "loss": 0.0, "lr": 1.454016380237605e-07, "epoch": 4.218028243368088, "percentage": 84.36, "elapsed_time": "0:46:52", "remaining_time": "0:08:41", "throughput": 5594.21, "total_tokens": 15732304} +{"current_steps": 31965, "total_steps": 37885, "loss": 0.0, "lr": 1.4516249300007743e-07, "epoch": 4.218688135145836, "percentage": 84.37, "elapsed_time": "0:46:52", "remaining_time": "0:08:40", "throughput": 5594.38, "total_tokens": 15734608} +{"current_steps": 31970, "total_steps": 37885, "loss": 0.0, "lr": 1.4492352940946506e-07, "epoch": 4.219348026923584, "percentage": 84.39, "elapsed_time": "0:46:52", "remaining_time": "0:08:40", "throughput": 5594.57, "total_tokens": 15736976} +{"current_steps": 31975, "total_steps": 37885, "loss": 0.0019, "lr": 1.4468474730264168e-07, "epoch": 4.220007918701333, "percentage": 84.4, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5594.86, "total_tokens": 15739664} +{"current_steps": 31980, "total_steps": 37885, "loss": 0.0, "lr": 1.4444614673028687e-07, "epoch": 4.220667810479082, "percentage": 84.41, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5595.07, "total_tokens": 15742096} +{"current_steps": 31985, "total_steps": 37885, "loss": 0.0, "lr": 1.442077277430419e-07, "epoch": 4.22132770225683, "percentage": 84.43, "elapsed_time": "0:46:53", "remaining_time": "0:08:39", "throughput": 5595.26, "total_tokens": 15744464} +{"current_steps": 31990, "total_steps": 37885, "loss": 0.0, "lr": 1.4396949039150984e-07, "epoch": 4.221987594034578, "percentage": 84.44, "elapsed_time": "0:46:54", "remaining_time": "0:08:38", "throughput": 5595.46, "total_tokens": 15746896} +{"current_steps": 31995, "total_steps": 37885, "loss": 0.0, "lr": 1.4373143472625438e-07, "epoch": 4.222647485812327, "percentage": 84.45, "elapsed_time": "0:46:54", "remaining_time": "0:08:38", "throughput": 5595.63, "total_tokens": 15749200} +{"current_steps": 32000, "total_steps": 37885, "loss": 0.0, "lr": 1.4349356079780116e-07, "epoch": 4.223307377590075, "percentage": 84.47, "elapsed_time": "0:46:54", "remaining_time": "0:08:37", "throughput": 5595.86, "total_tokens": 15751696} +{"current_steps": 32005, "total_steps": 37885, "loss": 0.0308, "lr": 1.432558686566374e-07, "epoch": 4.223967269367824, "percentage": 84.48, "elapsed_time": "0:46:55", "remaining_time": "0:08:37", "throughput": 5596.11, "total_tokens": 15754256} +{"current_steps": 32010, "total_steps": 37885, "loss": 0.0323, "lr": 1.4301835835321175e-07, "epoch": 4.224627161145572, "percentage": 84.49, "elapsed_time": "0:46:55", "remaining_time": "0:08:36", "throughput": 5596.42, "total_tokens": 15757008} +{"current_steps": 32015, "total_steps": 37885, "loss": 0.0, "lr": 1.4278102993793362e-07, "epoch": 4.225287052923321, "percentage": 84.51, "elapsed_time": "0:46:55", "remaining_time": "0:08:36", "throughput": 5596.59, "total_tokens": 15759312} +{"current_steps": 32020, "total_steps": 37885, "loss": 0.0, "lr": 1.4254388346117408e-07, "epoch": 4.225946944701069, "percentage": 84.52, "elapsed_time": "0:46:56", "remaining_time": "0:08:35", "throughput": 5596.76, "total_tokens": 15761616} +{"current_steps": 32025, "total_steps": 37885, "loss": 0.0, "lr": 1.423069189732664e-07, "epoch": 4.226606836478817, "percentage": 84.53, "elapsed_time": "0:46:56", "remaining_time": "0:08:35", "throughput": 5597.01, "total_tokens": 15764176} +{"current_steps": 32030, "total_steps": 37885, "loss": 0.0042, "lr": 1.4207013652450405e-07, "epoch": 4.227266728256566, "percentage": 84.55, "elapsed_time": "0:46:56", "remaining_time": "0:08:34", "throughput": 5597.26, "total_tokens": 15766736} +{"current_steps": 32035, "total_steps": 37885, "loss": 0.0023, "lr": 1.4183353616514293e-07, "epoch": 4.227926620034315, "percentage": 84.56, "elapsed_time": "0:46:57", "remaining_time": "0:08:34", "throughput": 5597.55, "total_tokens": 15769424} +{"current_steps": 32040, "total_steps": 37885, "loss": 0.0, "lr": 1.415971179453991e-07, "epoch": 4.228586511812063, "percentage": 84.57, "elapsed_time": "0:46:57", "remaining_time": "0:08:33", "throughput": 5597.88, "total_tokens": 15772240} +{"current_steps": 32045, "total_steps": 37885, "loss": 0.0001, "lr": 1.4136088191545083e-07, "epoch": 4.229246403589811, "percentage": 84.58, "elapsed_time": "0:46:57", "remaining_time": "0:08:33", "throughput": 5598.07, "total_tokens": 15774608} +{"current_steps": 32050, "total_steps": 37885, "loss": 0.0, "lr": 1.411248281254379e-07, "epoch": 4.2299062953675595, "percentage": 84.6, "elapsed_time": "0:46:58", "remaining_time": "0:08:33", "throughput": 5598.29, "total_tokens": 15777040} +{"current_steps": 32055, "total_steps": 37885, "loss": 0.0, "lr": 1.408889566254603e-07, "epoch": 4.230566187145308, "percentage": 84.61, "elapsed_time": "0:46:58", "remaining_time": "0:08:32", "throughput": 5598.49, "total_tokens": 15779472} +{"current_steps": 32060, "total_steps": 37885, "loss": 0.0, "lr": 1.4065326746558092e-07, "epoch": 4.231226078923057, "percentage": 84.62, "elapsed_time": "0:46:58", "remaining_time": "0:08:32", "throughput": 5598.7, "total_tokens": 15781904} +{"current_steps": 32065, "total_steps": 37885, "loss": 0.0, "lr": 1.4041776069582233e-07, "epoch": 4.231885970700805, "percentage": 84.64, "elapsed_time": "0:46:59", "remaining_time": "0:08:31", "throughput": 5599.0, "total_tokens": 15784592} +{"current_steps": 32070, "total_steps": 37885, "loss": 0.0, "lr": 1.4018243636616967e-07, "epoch": 4.2325458624785535, "percentage": 84.65, "elapsed_time": "0:46:59", "remaining_time": "0:08:31", "throughput": 5599.2, "total_tokens": 15787024} +{"current_steps": 32075, "total_steps": 37885, "loss": 0.0, "lr": 1.399472945265684e-07, "epoch": 4.233205754256302, "percentage": 84.66, "elapsed_time": "0:46:59", "remaining_time": "0:08:30", "throughput": 5599.41, "total_tokens": 15789456} +{"current_steps": 32080, "total_steps": 37885, "loss": 0.0176, "lr": 1.397123352269257e-07, "epoch": 4.23386564603405, "percentage": 84.68, "elapsed_time": "0:47:00", "remaining_time": "0:08:30", "throughput": 5599.62, "total_tokens": 15791888} +{"current_steps": 32085, "total_steps": 37885, "loss": 0.0002, "lr": 1.3947755851711053e-07, "epoch": 4.234525537811799, "percentage": 84.69, "elapsed_time": "0:47:00", "remaining_time": "0:08:29", "throughput": 5599.77, "total_tokens": 15794128} +{"current_steps": 32090, "total_steps": 37885, "loss": 0.0, "lr": 1.3924296444695194e-07, "epoch": 4.2351854295895475, "percentage": 84.7, "elapsed_time": "0:47:00", "remaining_time": "0:08:29", "throughput": 5599.89, "total_tokens": 15796304} +{"current_steps": 32095, "total_steps": 37885, "loss": 0.0, "lr": 1.3900855306624093e-07, "epoch": 4.235845321367296, "percentage": 84.72, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.12, "total_tokens": 15798800} +{"current_steps": 32100, "total_steps": 37885, "loss": 0.0007, "lr": 1.387743244247299e-07, "epoch": 4.236505213145044, "percentage": 84.73, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.38, "total_tokens": 15801424} +{"current_steps": 32105, "total_steps": 37885, "loss": 0.0, "lr": 1.385402785721319e-07, "epoch": 4.237165104922792, "percentage": 84.74, "elapsed_time": "0:47:01", "remaining_time": "0:08:28", "throughput": 5600.71, "total_tokens": 15804240} +{"current_steps": 32110, "total_steps": 37885, "loss": 0.0, "lr": 1.3830641555812162e-07, "epoch": 4.2378249967005415, "percentage": 84.76, "elapsed_time": "0:47:02", "remaining_time": "0:08:27", "throughput": 5600.88, "total_tokens": 15806544} +{"current_steps": 32115, "total_steps": 37885, "loss": 0.0268, "lr": 1.3807273543233466e-07, "epoch": 4.23848488847829, "percentage": 84.77, "elapsed_time": "0:47:02", "remaining_time": "0:08:27", "throughput": 5601.27, "total_tokens": 15809552} +{"current_steps": 32120, "total_steps": 37885, "loss": 0.0001, "lr": 1.3783923824436817e-07, "epoch": 4.239144780256038, "percentage": 84.78, "elapsed_time": "0:47:02", "remaining_time": "0:08:26", "throughput": 5601.48, "total_tokens": 15811984} +{"current_steps": 32125, "total_steps": 37885, "loss": 0.0, "lr": 1.3760592404377991e-07, "epoch": 4.239804672033786, "percentage": 84.8, "elapsed_time": "0:47:03", "remaining_time": "0:08:26", "throughput": 5601.75, "total_tokens": 15814608} +{"current_steps": 32130, "total_steps": 37885, "loss": 0.1054, "lr": 1.373727928800894e-07, "epoch": 4.240464563811535, "percentage": 84.81, "elapsed_time": "0:47:03", "remaining_time": "0:08:25", "throughput": 5601.96, "total_tokens": 15817040} +{"current_steps": 32135, "total_steps": 37885, "loss": 0.0323, "lr": 1.3713984480277708e-07, "epoch": 4.241124455589284, "percentage": 84.82, "elapsed_time": "0:47:03", "remaining_time": "0:08:25", "throughput": 5602.21, "total_tokens": 15819600} +{"current_steps": 32140, "total_steps": 37885, "loss": 0.0, "lr": 1.3690707986128414e-07, "epoch": 4.241784347367032, "percentage": 84.84, "elapsed_time": "0:47:04", "remaining_time": "0:08:24", "throughput": 5602.6, "total_tokens": 15822608} +{"current_steps": 32145, "total_steps": 37885, "loss": 0.0, "lr": 1.3667449810501353e-07, "epoch": 4.24244423914478, "percentage": 84.85, "elapsed_time": "0:47:04", "remaining_time": "0:08:24", "throughput": 5602.91, "total_tokens": 15825360} +{"current_steps": 32150, "total_steps": 37885, "loss": 0.0, "lr": 1.3644209958332908e-07, "epoch": 4.243104130922529, "percentage": 84.86, "elapsed_time": "0:47:04", "remaining_time": "0:08:23", "throughput": 5603.12, "total_tokens": 15827792} +{"current_steps": 32155, "total_steps": 37885, "loss": 0.0253, "lr": 1.3620988434555546e-07, "epoch": 4.243764022700277, "percentage": 84.88, "elapsed_time": "0:47:05", "remaining_time": "0:08:23", "throughput": 5603.32, "total_tokens": 15830224} +{"current_steps": 32160, "total_steps": 37885, "loss": 0.0381, "lr": 1.3597785244097882e-07, "epoch": 4.244423914478026, "percentage": 84.89, "elapsed_time": "0:47:05", "remaining_time": "0:08:22", "throughput": 5603.54, "total_tokens": 15832720} +{"current_steps": 32165, "total_steps": 37885, "loss": 0.0, "lr": 1.3574600391884627e-07, "epoch": 4.245083806255774, "percentage": 84.9, "elapsed_time": "0:47:05", "remaining_time": "0:08:22", "throughput": 5603.75, "total_tokens": 15835152} +{"current_steps": 32170, "total_steps": 37885, "loss": 0.0, "lr": 1.3551433882836615e-07, "epoch": 4.245743698033523, "percentage": 84.91, "elapsed_time": "0:47:06", "remaining_time": "0:08:22", "throughput": 5603.98, "total_tokens": 15837648} +{"current_steps": 32175, "total_steps": 37885, "loss": 0.0, "lr": 1.3528285721870747e-07, "epoch": 4.246403589811271, "percentage": 84.93, "elapsed_time": "0:47:06", "remaining_time": "0:08:21", "throughput": 5604.13, "total_tokens": 15839888} +{"current_steps": 32180, "total_steps": 37885, "loss": 0.0, "lr": 1.3505155913900012e-07, "epoch": 4.247063481589019, "percentage": 84.94, "elapsed_time": "0:47:06", "remaining_time": "0:08:21", "throughput": 5604.44, "total_tokens": 15842640} +{"current_steps": 32185, "total_steps": 37885, "loss": 0.0411, "lr": 1.3482044463833632e-07, "epoch": 4.247723373366767, "percentage": 84.95, "elapsed_time": "0:47:07", "remaining_time": "0:08:20", "throughput": 5604.65, "total_tokens": 15845072} +{"current_steps": 32190, "total_steps": 37885, "loss": 0.0046, "lr": 1.3458951376576778e-07, "epoch": 4.248383265144517, "percentage": 84.97, "elapsed_time": "0:47:07", "remaining_time": "0:08:20", "throughput": 5604.85, "total_tokens": 15847504} +{"current_steps": 32195, "total_steps": 37885, "loss": 0.0, "lr": 1.343587665703082e-07, "epoch": 4.249043156922265, "percentage": 84.98, "elapsed_time": "0:47:07", "remaining_time": "0:08:19", "throughput": 5605.1, "total_tokens": 15850064} +{"current_steps": 32200, "total_steps": 37885, "loss": 0.0, "lr": 1.341282031009321e-07, "epoch": 4.249703048700013, "percentage": 84.99, "elapsed_time": "0:47:08", "remaining_time": "0:08:19", "throughput": 5605.39, "total_tokens": 15852752} +{"current_steps": 32205, "total_steps": 37885, "loss": 0.0442, "lr": 1.338978234065745e-07, "epoch": 4.250362940477761, "percentage": 85.01, "elapsed_time": "0:47:08", "remaining_time": "0:08:18", "throughput": 5605.56, "total_tokens": 15855056} +{"current_steps": 32210, "total_steps": 37885, "loss": 0.0143, "lr": 1.3366762753613236e-07, "epoch": 4.25102283225551, "percentage": 85.02, "elapsed_time": "0:47:08", "remaining_time": "0:08:18", "throughput": 5605.77, "total_tokens": 15857488} +{"current_steps": 32215, "total_steps": 37885, "loss": 0.0087, "lr": 1.3343761553846222e-07, "epoch": 4.251682724033259, "percentage": 85.03, "elapsed_time": "0:47:09", "remaining_time": "0:08:17", "throughput": 5605.98, "total_tokens": 15859920} +{"current_steps": 32215, "total_steps": 37885, "eval_loss": 0.24730534851551056, "epoch": 4.251682724033259, "percentage": 85.03, "elapsed_time": "0:47:16", "remaining_time": "0:08:19", "throughput": 5590.54, "total_tokens": 15859920} +{"current_steps": 32220, "total_steps": 37885, "loss": 0.0, "lr": 1.332077874623836e-07, "epoch": 4.252342615811007, "percentage": 85.05, "elapsed_time": "0:47:51", "remaining_time": "0:08:24", "throughput": 5523.6, "total_tokens": 15862480} +{"current_steps": 32225, "total_steps": 37885, "loss": 0.0577, "lr": 1.3297814335667523e-07, "epoch": 4.253002507588755, "percentage": 85.06, "elapsed_time": "0:47:52", "remaining_time": "0:08:24", "throughput": 5523.93, "total_tokens": 15865296} +{"current_steps": 32230, "total_steps": 37885, "loss": 0.0548, "lr": 1.3274868327007715e-07, "epoch": 4.253662399366504, "percentage": 85.07, "elapsed_time": "0:47:52", "remaining_time": "0:08:23", "throughput": 5524.1, "total_tokens": 15867600} +{"current_steps": 32235, "total_steps": 37885, "loss": 0.0122, "lr": 1.3251940725129108e-07, "epoch": 4.254322291144252, "percentage": 85.09, "elapsed_time": "0:47:52", "remaining_time": "0:08:23", "throughput": 5524.31, "total_tokens": 15870032} +{"current_steps": 32240, "total_steps": 37885, "loss": 0.0, "lr": 1.3229031534897882e-07, "epoch": 4.254982182922001, "percentage": 85.1, "elapsed_time": "0:47:53", "remaining_time": "0:08:23", "throughput": 5524.51, "total_tokens": 15872464} +{"current_steps": 32245, "total_steps": 37885, "loss": 0.0, "lr": 1.320614076117641e-07, "epoch": 4.255642074699749, "percentage": 85.11, "elapsed_time": "0:47:53", "remaining_time": "0:08:22", "throughput": 5524.67, "total_tokens": 15874768} +{"current_steps": 32250, "total_steps": 37885, "loss": 0.0, "lr": 1.318326840882301e-07, "epoch": 4.256301966477498, "percentage": 85.13, "elapsed_time": "0:47:53", "remaining_time": "0:08:22", "throughput": 5524.84, "total_tokens": 15877136} +{"current_steps": 32255, "total_steps": 37885, "loss": 0.0, "lr": 1.3160414482692217e-07, "epoch": 4.256961858255246, "percentage": 85.14, "elapsed_time": "0:47:54", "remaining_time": "0:08:21", "throughput": 5524.95, "total_tokens": 15879312} +{"current_steps": 32260, "total_steps": 37885, "loss": 0.0, "lr": 1.3137578987634635e-07, "epoch": 4.257621750032994, "percentage": 85.15, "elapsed_time": "0:47:54", "remaining_time": "0:08:21", "throughput": 5525.21, "total_tokens": 15881936} +{"current_steps": 32265, "total_steps": 37885, "loss": 0.0, "lr": 1.3114761928496875e-07, "epoch": 4.258281641810743, "percentage": 85.17, "elapsed_time": "0:47:54", "remaining_time": "0:08:20", "throughput": 5525.37, "total_tokens": 15884240} +{"current_steps": 32270, "total_steps": 37885, "loss": 0.001, "lr": 1.3091963310121734e-07, "epoch": 4.258941533588492, "percentage": 85.18, "elapsed_time": "0:47:55", "remaining_time": "0:08:20", "throughput": 5525.59, "total_tokens": 15886736} +{"current_steps": 32275, "total_steps": 37885, "loss": 0.0, "lr": 1.306918313734805e-07, "epoch": 4.25960142536624, "percentage": 85.19, "elapsed_time": "0:47:55", "remaining_time": "0:08:19", "throughput": 5525.72, "total_tokens": 15888976} +{"current_steps": 32280, "total_steps": 37885, "loss": 0.0001, "lr": 1.3046421415010732e-07, "epoch": 4.260261317143988, "percentage": 85.21, "elapsed_time": "0:47:55", "remaining_time": "0:08:19", "throughput": 5525.81, "total_tokens": 15891088} +{"current_steps": 32285, "total_steps": 37885, "loss": 0.0, "lr": 1.3023678147940797e-07, "epoch": 4.2609212089217365, "percentage": 85.22, "elapsed_time": "0:47:56", "remaining_time": "0:08:18", "throughput": 5526.07, "total_tokens": 15893712} +{"current_steps": 32290, "total_steps": 37885, "loss": 0.0213, "lr": 1.3000953340965336e-07, "epoch": 4.261581100699486, "percentage": 85.23, "elapsed_time": "0:47:56", "remaining_time": "0:08:18", "throughput": 5526.28, "total_tokens": 15896144} +{"current_steps": 32295, "total_steps": 37885, "loss": 0.0, "lr": 1.297824699890756e-07, "epoch": 4.262240992477234, "percentage": 85.24, "elapsed_time": "0:47:56", "remaining_time": "0:08:17", "throughput": 5526.5, "total_tokens": 15898640} +{"current_steps": 32300, "total_steps": 37885, "loss": 0.0, "lr": 1.2955559126586667e-07, "epoch": 4.262900884254982, "percentage": 85.26, "elapsed_time": "0:47:57", "remaining_time": "0:08:17", "throughput": 5526.67, "total_tokens": 15901008} +{"current_steps": 32305, "total_steps": 37885, "loss": 0.0, "lr": 1.293288972881803e-07, "epoch": 4.2635607760327305, "percentage": 85.27, "elapsed_time": "0:47:57", "remaining_time": "0:08:17", "throughput": 5526.95, "total_tokens": 15903696} +{"current_steps": 32310, "total_steps": 37885, "loss": 0.0, "lr": 1.2910238810413075e-07, "epoch": 4.264220667810479, "percentage": 85.28, "elapsed_time": "0:47:57", "remaining_time": "0:08:16", "throughput": 5527.14, "total_tokens": 15906128} +{"current_steps": 32315, "total_steps": 37885, "loss": 0.0, "lr": 1.2887606376179262e-07, "epoch": 4.264880559588228, "percentage": 85.3, "elapsed_time": "0:47:58", "remaining_time": "0:08:16", "throughput": 5527.35, "total_tokens": 15908624} +{"current_steps": 32320, "total_steps": 37885, "loss": 0.0001, "lr": 1.2864992430920164e-07, "epoch": 4.265540451365976, "percentage": 85.31, "elapsed_time": "0:47:58", "remaining_time": "0:08:15", "throughput": 5527.48, "total_tokens": 15910864} +{"current_steps": 32325, "total_steps": 37885, "loss": 0.0004, "lr": 1.2842396979435476e-07, "epoch": 4.2662003431437245, "percentage": 85.32, "elapsed_time": "0:47:58", "remaining_time": "0:08:15", "throughput": 5527.66, "total_tokens": 15913296} +{"current_steps": 32330, "total_steps": 37885, "loss": 0.0, "lr": 1.2819820026520856e-07, "epoch": 4.266860234921473, "percentage": 85.34, "elapsed_time": "0:47:59", "remaining_time": "0:08:14", "throughput": 5527.88, "total_tokens": 15915792} +{"current_steps": 32335, "total_steps": 37885, "loss": 0.0, "lr": 1.2797261576968133e-07, "epoch": 4.267520126699221, "percentage": 85.35, "elapsed_time": "0:47:59", "remaining_time": "0:08:14", "throughput": 5527.98, "total_tokens": 15917968} +{"current_steps": 32340, "total_steps": 37885, "loss": 0.0, "lr": 1.2774721635565156e-07, "epoch": 4.268180018476969, "percentage": 85.36, "elapsed_time": "0:47:59", "remaining_time": "0:08:13", "throughput": 5528.26, "total_tokens": 15920656} +{"current_steps": 32345, "total_steps": 37885, "loss": 0.0, "lr": 1.275220020709591e-07, "epoch": 4.2688399102547185, "percentage": 85.38, "elapsed_time": "0:48:00", "remaining_time": "0:08:13", "throughput": 5528.43, "total_tokens": 15923024} +{"current_steps": 32350, "total_steps": 37885, "loss": 0.0503, "lr": 1.2729697296340358e-07, "epoch": 4.269499802032467, "percentage": 85.39, "elapsed_time": "0:48:00", "remaining_time": "0:08:12", "throughput": 5528.59, "total_tokens": 15925328} +{"current_steps": 32355, "total_steps": 37885, "loss": 0.0, "lr": 1.270721290807456e-07, "epoch": 4.270159693810215, "percentage": 85.4, "elapsed_time": "0:48:00", "remaining_time": "0:08:12", "throughput": 5528.79, "total_tokens": 15927760} +{"current_steps": 32360, "total_steps": 37885, "loss": 0.0, "lr": 1.268474704707073e-07, "epoch": 4.270819585587963, "percentage": 85.42, "elapsed_time": "0:48:01", "remaining_time": "0:08:11", "throughput": 5528.99, "total_tokens": 15930192} +{"current_steps": 32365, "total_steps": 37885, "loss": 0.0747, "lr": 1.2662299718097036e-07, "epoch": 4.271479477365712, "percentage": 85.43, "elapsed_time": "0:48:01", "remaining_time": "0:08:11", "throughput": 5529.1, "total_tokens": 15932368} +{"current_steps": 32370, "total_steps": 37885, "loss": 0.0, "lr": 1.2639870925917805e-07, "epoch": 4.272139369143461, "percentage": 85.44, "elapsed_time": "0:48:01", "remaining_time": "0:08:10", "throughput": 5529.35, "total_tokens": 15934928} +{"current_steps": 32375, "total_steps": 37885, "loss": 0.0, "lr": 1.2617460675293312e-07, "epoch": 4.272799260921209, "percentage": 85.46, "elapsed_time": "0:48:02", "remaining_time": "0:08:10", "throughput": 5529.51, "total_tokens": 15937232} +{"current_steps": 32380, "total_steps": 37885, "loss": 0.0, "lr": 1.259506897098005e-07, "epoch": 4.273459152698957, "percentage": 85.47, "elapsed_time": "0:48:02", "remaining_time": "0:08:10", "throughput": 5529.86, "total_tokens": 15940176} +{"current_steps": 32385, "total_steps": 37885, "loss": 0.0005, "lr": 1.2572695817730473e-07, "epoch": 4.274119044476706, "percentage": 85.48, "elapsed_time": "0:48:02", "remaining_time": "0:08:09", "throughput": 5530.05, "total_tokens": 15942608} +{"current_steps": 32390, "total_steps": 37885, "loss": 0.0, "lr": 1.2550341220293059e-07, "epoch": 4.274778936254454, "percentage": 85.5, "elapsed_time": "0:48:03", "remaining_time": "0:08:09", "throughput": 5530.32, "total_tokens": 15945296} +{"current_steps": 32395, "total_steps": 37885, "loss": 0.0, "lr": 1.2528005183412503e-07, "epoch": 4.275438828032203, "percentage": 85.51, "elapsed_time": "0:48:03", "remaining_time": "0:08:08", "throughput": 5530.58, "total_tokens": 15947920} +{"current_steps": 32400, "total_steps": 37885, "loss": 0.0, "lr": 1.2505687711829417e-07, "epoch": 4.276098719809951, "percentage": 85.52, "elapsed_time": "0:48:03", "remaining_time": "0:08:08", "throughput": 5530.86, "total_tokens": 15950672} +{"current_steps": 32405, "total_steps": 37885, "loss": 0.0016, "lr": 1.2483388810280538e-07, "epoch": 4.2767586115877, "percentage": 85.54, "elapsed_time": "0:48:04", "remaining_time": "0:08:07", "throughput": 5531.2, "total_tokens": 15953552} +{"current_steps": 32410, "total_steps": 37885, "loss": 0.0007, "lr": 1.2461108483498617e-07, "epoch": 4.277418503365448, "percentage": 85.55, "elapsed_time": "0:48:04", "remaining_time": "0:08:07", "throughput": 5531.38, "total_tokens": 15955920} +{"current_steps": 32415, "total_steps": 37885, "loss": 0.0, "lr": 1.2438846736212516e-07, "epoch": 4.278078395143196, "percentage": 85.56, "elapsed_time": "0:48:04", "remaining_time": "0:08:06", "throughput": 5531.63, "total_tokens": 15958544} +{"current_steps": 32420, "total_steps": 37885, "loss": 0.0, "lr": 1.2416603573147155e-07, "epoch": 4.278738286920945, "percentage": 85.57, "elapsed_time": "0:48:05", "remaining_time": "0:08:06", "throughput": 5531.89, "total_tokens": 15961168} +{"current_steps": 32425, "total_steps": 37885, "loss": 0.0, "lr": 1.2394378999023426e-07, "epoch": 4.279398178698694, "percentage": 85.59, "elapsed_time": "0:48:05", "remaining_time": "0:08:05", "throughput": 5532.02, "total_tokens": 15963408} +{"current_steps": 32430, "total_steps": 37885, "loss": 0.0, "lr": 1.2372173018558373e-07, "epoch": 4.280058070476442, "percentage": 85.6, "elapsed_time": "0:48:05", "remaining_time": "0:08:05", "throughput": 5532.34, "total_tokens": 15966224} +{"current_steps": 32435, "total_steps": 37885, "loss": 0.0002, "lr": 1.2349985636465054e-07, "epoch": 4.28071796225419, "percentage": 85.61, "elapsed_time": "0:48:06", "remaining_time": "0:08:04", "throughput": 5532.46, "total_tokens": 15968464} +{"current_steps": 32440, "total_steps": 37885, "loss": 0.0, "lr": 1.2327816857452567e-07, "epoch": 4.2813778540319385, "percentage": 85.63, "elapsed_time": "0:48:06", "remaining_time": "0:08:04", "throughput": 5532.77, "total_tokens": 15971280} +{"current_steps": 32445, "total_steps": 37885, "loss": 0.0, "lr": 1.230566668622607e-07, "epoch": 4.282037745809687, "percentage": 85.64, "elapsed_time": "0:48:07", "remaining_time": "0:08:04", "throughput": 5532.9, "total_tokens": 15973520} +{"current_steps": 32450, "total_steps": 37885, "loss": 0.0, "lr": 1.2283535127486789e-07, "epoch": 4.282697637587436, "percentage": 85.65, "elapsed_time": "0:48:07", "remaining_time": "0:08:03", "throughput": 5533.11, "total_tokens": 15976016} +{"current_steps": 32455, "total_steps": 37885, "loss": 0.0, "lr": 1.2261422185932003e-07, "epoch": 4.283357529365184, "percentage": 85.67, "elapsed_time": "0:48:07", "remaining_time": "0:08:03", "throughput": 5533.27, "total_tokens": 15978320} +{"current_steps": 32460, "total_steps": 37885, "loss": 0.0, "lr": 1.223932786625499e-07, "epoch": 4.2840174211429325, "percentage": 85.68, "elapsed_time": "0:48:08", "remaining_time": "0:08:02", "throughput": 5533.5, "total_tokens": 15980880} +{"current_steps": 32465, "total_steps": 37885, "loss": 0.0, "lr": 1.221725217314512e-07, "epoch": 4.284677312920681, "percentage": 85.69, "elapsed_time": "0:48:08", "remaining_time": "0:08:02", "throughput": 5533.69, "total_tokens": 15983312} +{"current_steps": 32470, "total_steps": 37885, "loss": 0.0017, "lr": 1.2195195111287827e-07, "epoch": 4.285337204698429, "percentage": 85.71, "elapsed_time": "0:48:08", "remaining_time": "0:08:01", "throughput": 5533.92, "total_tokens": 15985872} +{"current_steps": 32475, "total_steps": 37885, "loss": 0.0, "lr": 1.2173156685364516e-07, "epoch": 4.285997096476178, "percentage": 85.72, "elapsed_time": "0:48:09", "remaining_time": "0:08:01", "throughput": 5534.1, "total_tokens": 15988304} +{"current_steps": 32480, "total_steps": 37885, "loss": 0.024, "lr": 1.2151136900052706e-07, "epoch": 4.2866569882539265, "percentage": 85.73, "elapsed_time": "0:48:09", "remaining_time": "0:08:00", "throughput": 5534.28, "total_tokens": 15990672} +{"current_steps": 32485, "total_steps": 37885, "loss": 0.0008, "lr": 1.2129135760025955e-07, "epoch": 4.287316880031675, "percentage": 85.75, "elapsed_time": "0:48:09", "remaining_time": "0:08:00", "throughput": 5534.44, "total_tokens": 15993040} +{"current_steps": 32490, "total_steps": 37885, "loss": 0.0007, "lr": 1.2107153269953818e-07, "epoch": 4.287976771809423, "percentage": 85.76, "elapsed_time": "0:48:10", "remaining_time": "0:07:59", "throughput": 5534.72, "total_tokens": 15995792} +{"current_steps": 32495, "total_steps": 37885, "loss": 0.0, "lr": 1.208518943450192e-07, "epoch": 4.288636663587171, "percentage": 85.77, "elapsed_time": "0:48:10", "remaining_time": "0:07:59", "throughput": 5534.94, "total_tokens": 15998288} +{"current_steps": 32500, "total_steps": 37885, "loss": 0.0001, "lr": 1.2063244258331938e-07, "epoch": 4.2892965553649205, "percentage": 85.79, "elapsed_time": "0:48:10", "remaining_time": "0:07:58", "throughput": 5535.18, "total_tokens": 16000912} +{"current_steps": 32505, "total_steps": 37885, "loss": 0.0, "lr": 1.2041317746101599e-07, "epoch": 4.289956447142669, "percentage": 85.8, "elapsed_time": "0:48:11", "remaining_time": "0:07:58", "throughput": 5535.3, "total_tokens": 16003088} +{"current_steps": 32510, "total_steps": 37885, "loss": 0.0, "lr": 1.2019409902464616e-07, "epoch": 4.290616338920417, "percentage": 85.81, "elapsed_time": "0:48:11", "remaining_time": "0:07:58", "throughput": 5535.56, "total_tokens": 16005776} +{"current_steps": 32515, "total_steps": 37885, "loss": 0.0, "lr": 1.1997520732070742e-07, "epoch": 4.291276230698165, "percentage": 85.83, "elapsed_time": "0:48:11", "remaining_time": "0:07:57", "throughput": 5535.73, "total_tokens": 16008144} +{"current_steps": 32520, "total_steps": 37885, "loss": 0.0, "lr": 1.197565023956586e-07, "epoch": 4.291936122475914, "percentage": 85.84, "elapsed_time": "0:48:12", "remaining_time": "0:07:57", "throughput": 5535.98, "total_tokens": 16010768} +{"current_steps": 32525, "total_steps": 37885, "loss": 0.0, "lr": 1.1953798429591778e-07, "epoch": 4.292596014253663, "percentage": 85.85, "elapsed_time": "0:48:12", "remaining_time": "0:07:56", "throughput": 5536.16, "total_tokens": 16013200} +{"current_steps": 32530, "total_steps": 37885, "loss": 0.0002, "lr": 1.1931965306786396e-07, "epoch": 4.293255906031411, "percentage": 85.87, "elapsed_time": "0:48:12", "remaining_time": "0:07:56", "throughput": 5536.41, "total_tokens": 16015824} +{"current_steps": 32535, "total_steps": 37885, "loss": 0.0, "lr": 1.1910150875783664e-07, "epoch": 4.293915797809159, "percentage": 85.88, "elapsed_time": "0:48:13", "remaining_time": "0:07:55", "throughput": 5536.52, "total_tokens": 16018064} +{"current_steps": 32540, "total_steps": 37885, "loss": 0.0, "lr": 1.1888355141213491e-07, "epoch": 4.294575689586908, "percentage": 85.89, "elapsed_time": "0:48:13", "remaining_time": "0:07:55", "throughput": 5536.68, "total_tokens": 16020432} +{"current_steps": 32545, "total_steps": 37885, "loss": 0.0001, "lr": 1.1866578107701897e-07, "epoch": 4.295235581364656, "percentage": 85.9, "elapsed_time": "0:48:13", "remaining_time": "0:07:54", "throughput": 5536.93, "total_tokens": 16023056} +{"current_steps": 32550, "total_steps": 37885, "loss": 0.0001, "lr": 1.1844819779870862e-07, "epoch": 4.295895473142405, "percentage": 85.92, "elapsed_time": "0:48:14", "remaining_time": "0:07:54", "throughput": 5537.07, "total_tokens": 16025360} +{"current_steps": 32555, "total_steps": 37885, "loss": 0.0, "lr": 1.1823080162338483e-07, "epoch": 4.296555364920153, "percentage": 85.93, "elapsed_time": "0:48:14", "remaining_time": "0:07:53", "throughput": 5537.3, "total_tokens": 16027920} +{"current_steps": 32560, "total_steps": 37885, "loss": 0.0, "lr": 1.1801359259718823e-07, "epoch": 4.297215256697902, "percentage": 85.94, "elapsed_time": "0:48:14", "remaining_time": "0:07:53", "throughput": 5537.5, "total_tokens": 16030416} +{"current_steps": 32565, "total_steps": 37885, "loss": 0.0001, "lr": 1.1779657076621951e-07, "epoch": 4.29787514847565, "percentage": 85.96, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5537.66, "total_tokens": 16032784} +{"current_steps": 32570, "total_steps": 37885, "loss": 0.0, "lr": 1.1757973617654027e-07, "epoch": 4.298535040253398, "percentage": 85.97, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5537.86, "total_tokens": 16035216} +{"current_steps": 32575, "total_steps": 37885, "loss": 0.0, "lr": 1.1736308887417201e-07, "epoch": 4.299194932031147, "percentage": 85.98, "elapsed_time": "0:48:15", "remaining_time": "0:07:52", "throughput": 5538.02, "total_tokens": 16037584} +{"current_steps": 32580, "total_steps": 37885, "loss": 0.0001, "lr": 1.1714662890509685e-07, "epoch": 4.299854823808896, "percentage": 86.0, "elapsed_time": "0:48:16", "remaining_time": "0:07:51", "throughput": 5538.21, "total_tokens": 16040016} +{"current_steps": 32585, "total_steps": 37885, "loss": 0.0, "lr": 1.1693035631525628e-07, "epoch": 4.300514715586644, "percentage": 86.01, "elapsed_time": "0:48:16", "remaining_time": "0:07:51", "throughput": 5538.46, "total_tokens": 16042640} +{"current_steps": 32590, "total_steps": 37885, "loss": 0.0, "lr": 1.1671427115055299e-07, "epoch": 4.301174607364392, "percentage": 86.02, "elapsed_time": "0:48:16", "remaining_time": "0:07:50", "throughput": 5538.66, "total_tokens": 16045136} +{"current_steps": 32595, "total_steps": 37885, "loss": 0.0006, "lr": 1.1649837345684954e-07, "epoch": 4.3018344991421404, "percentage": 86.04, "elapsed_time": "0:48:17", "remaining_time": "0:07:50", "throughput": 5538.89, "total_tokens": 16047696} +{"current_steps": 32600, "total_steps": 37885, "loss": 0.0004, "lr": 1.1628266327996827e-07, "epoch": 4.302494390919889, "percentage": 86.05, "elapsed_time": "0:48:17", "remaining_time": "0:07:49", "throughput": 5539.04, "total_tokens": 16050000} +{"current_steps": 32605, "total_steps": 37885, "loss": 0.0003, "lr": 1.1606714066569235e-07, "epoch": 4.303154282697638, "percentage": 86.06, "elapsed_time": "0:48:17", "remaining_time": "0:07:49", "throughput": 5539.29, "total_tokens": 16052624} +{"current_steps": 32610, "total_steps": 37885, "loss": 0.0361, "lr": 1.1585180565976515e-07, "epoch": 4.303814174475386, "percentage": 86.08, "elapsed_time": "0:48:18", "remaining_time": "0:07:48", "throughput": 5539.42, "total_tokens": 16054864} +{"current_steps": 32615, "total_steps": 37885, "loss": 0.0, "lr": 1.1563665830788948e-07, "epoch": 4.3044740662531344, "percentage": 86.09, "elapsed_time": "0:48:18", "remaining_time": "0:07:48", "throughput": 5539.53, "total_tokens": 16057104} +{"current_steps": 32620, "total_steps": 37885, "loss": 0.0001, "lr": 1.1542169865572904e-07, "epoch": 4.305133958030883, "percentage": 86.1, "elapsed_time": "0:48:18", "remaining_time": "0:07:47", "throughput": 5539.7, "total_tokens": 16059472} +{"current_steps": 32625, "total_steps": 37885, "loss": 0.0002, "lr": 1.1520692674890741e-07, "epoch": 4.305793849808631, "percentage": 86.12, "elapsed_time": "0:48:19", "remaining_time": "0:07:47", "throughput": 5539.82, "total_tokens": 16061712} +{"current_steps": 32630, "total_steps": 37885, "loss": 0.0001, "lr": 1.149923426330086e-07, "epoch": 4.30645374158638, "percentage": 86.13, "elapsed_time": "0:48:19", "remaining_time": "0:07:46", "throughput": 5539.96, "total_tokens": 16064016} +{"current_steps": 32635, "total_steps": 37885, "loss": 0.0001, "lr": 1.1477794635357618e-07, "epoch": 4.3071136333641284, "percentage": 86.14, "elapsed_time": "0:48:19", "remaining_time": "0:07:46", "throughput": 5540.07, "total_tokens": 16066192} +{"current_steps": 32640, "total_steps": 37885, "loss": 0.0, "lr": 1.145637379561144e-07, "epoch": 4.307773525141877, "percentage": 86.16, "elapsed_time": "0:48:20", "remaining_time": "0:07:46", "throughput": 5540.18, "total_tokens": 16068368} +{"current_steps": 32645, "total_steps": 37885, "loss": 0.0002, "lr": 1.1434971748608757e-07, "epoch": 4.308433416919625, "percentage": 86.17, "elapsed_time": "0:48:20", "remaining_time": "0:07:45", "throughput": 5540.23, "total_tokens": 16070416} +{"current_steps": 32650, "total_steps": 37885, "loss": 0.0, "lr": 1.1413588498891957e-07, "epoch": 4.309093308697373, "percentage": 86.18, "elapsed_time": "0:48:21", "remaining_time": "0:07:45", "throughput": 5540.4, "total_tokens": 16072784} +{"current_steps": 32655, "total_steps": 37885, "loss": 0.0001, "lr": 1.139222405099951e-07, "epoch": 4.3097532004751224, "percentage": 86.2, "elapsed_time": "0:48:21", "remaining_time": "0:07:44", "throughput": 5540.62, "total_tokens": 16075280} +{"current_steps": 32660, "total_steps": 37885, "loss": 0.0, "lr": 1.137087840946589e-07, "epoch": 4.310413092252871, "percentage": 86.21, "elapsed_time": "0:48:21", "remaining_time": "0:07:44", "throughput": 5540.91, "total_tokens": 16078032} +{"current_steps": 32665, "total_steps": 37885, "loss": 0.0133, "lr": 1.1349551578821493e-07, "epoch": 4.311072984030619, "percentage": 86.22, "elapsed_time": "0:48:22", "remaining_time": "0:07:43", "throughput": 5541.09, "total_tokens": 16080464} +{"current_steps": 32670, "total_steps": 37885, "loss": 0.0, "lr": 1.1328243563592831e-07, "epoch": 4.311732875808367, "percentage": 86.23, "elapsed_time": "0:48:22", "remaining_time": "0:07:43", "throughput": 5541.29, "total_tokens": 16082960} +{"current_steps": 32675, "total_steps": 37885, "loss": 0.0, "lr": 1.1306954368302357e-07, "epoch": 4.312392767586116, "percentage": 86.25, "elapsed_time": "0:48:22", "remaining_time": "0:07:42", "throughput": 5541.5, "total_tokens": 16085456} +{"current_steps": 32680, "total_steps": 37885, "loss": 0.0015, "lr": 1.1285683997468564e-07, "epoch": 4.313052659363865, "percentage": 86.26, "elapsed_time": "0:48:23", "remaining_time": "0:07:42", "throughput": 5541.56, "total_tokens": 16087504} +{"current_steps": 32685, "total_steps": 37885, "loss": 0.0, "lr": 1.1264432455605933e-07, "epoch": 4.313712551141613, "percentage": 86.27, "elapsed_time": "0:48:23", "remaining_time": "0:07:41", "throughput": 5541.75, "total_tokens": 16089936} +{"current_steps": 32690, "total_steps": 37885, "loss": 0.0, "lr": 1.1243199747224897e-07, "epoch": 4.314372442919361, "percentage": 86.29, "elapsed_time": "0:48:23", "remaining_time": "0:07:41", "throughput": 5541.85, "total_tokens": 16092112} +{"current_steps": 32695, "total_steps": 37885, "loss": 0.0, "lr": 1.122198587683203e-07, "epoch": 4.31503233469711, "percentage": 86.3, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.04, "total_tokens": 16094544} +{"current_steps": 32700, "total_steps": 37885, "loss": 0.0239, "lr": 1.1200790848929764e-07, "epoch": 4.315692226474858, "percentage": 86.31, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.19, "total_tokens": 16096848} +{"current_steps": 32705, "total_steps": 37885, "loss": 0.0, "lr": 1.1179614668016624e-07, "epoch": 4.316352118252606, "percentage": 86.33, "elapsed_time": "0:48:24", "remaining_time": "0:07:40", "throughput": 5542.29, "total_tokens": 16099024} +{"current_steps": 32710, "total_steps": 37885, "loss": 0.0144, "lr": 1.1158457338587047e-07, "epoch": 4.317012010030355, "percentage": 86.34, "elapsed_time": "0:48:25", "remaining_time": "0:07:39", "throughput": 5542.59, "total_tokens": 16101776} +{"current_steps": 32715, "total_steps": 37885, "loss": 0.0, "lr": 1.1137318865131595e-07, "epoch": 4.317671901808104, "percentage": 86.35, "elapsed_time": "0:48:25", "remaining_time": "0:07:39", "throughput": 5542.75, "total_tokens": 16104144} +{"current_steps": 32720, "total_steps": 37885, "loss": 0.0, "lr": 1.1116199252136727e-07, "epoch": 4.318331793585852, "percentage": 86.37, "elapsed_time": "0:48:25", "remaining_time": "0:07:38", "throughput": 5542.91, "total_tokens": 16106512} +{"current_steps": 32725, "total_steps": 37885, "loss": 0.0427, "lr": 1.1095098504084877e-07, "epoch": 4.3189916853636, "percentage": 86.38, "elapsed_time": "0:48:26", "remaining_time": "0:07:38", "throughput": 5543.09, "total_tokens": 16108944} +{"current_steps": 32730, "total_steps": 37885, "loss": 0.0001, "lr": 1.1074016625454607e-07, "epoch": 4.319651577141348, "percentage": 86.39, "elapsed_time": "0:48:26", "remaining_time": "0:07:37", "throughput": 5543.26, "total_tokens": 16111312} +{"current_steps": 32735, "total_steps": 37885, "loss": 0.028, "lr": 1.1052953620720351e-07, "epoch": 4.320311468919098, "percentage": 86.41, "elapsed_time": "0:48:26", "remaining_time": "0:07:37", "throughput": 5543.53, "total_tokens": 16114000} +{"current_steps": 32740, "total_steps": 37885, "loss": 0.0322, "lr": 1.1031909494352588e-07, "epoch": 4.320971360696846, "percentage": 86.42, "elapsed_time": "0:48:27", "remaining_time": "0:07:36", "throughput": 5543.61, "total_tokens": 16116112} +{"current_steps": 32745, "total_steps": 37885, "loss": 0.0533, "lr": 1.1010884250817765e-07, "epoch": 4.321631252474594, "percentage": 86.43, "elapsed_time": "0:48:27", "remaining_time": "0:07:36", "throughput": 5543.79, "total_tokens": 16118544} +{"current_steps": 32750, "total_steps": 37885, "loss": 0.0, "lr": 1.098987789457836e-07, "epoch": 4.322291144252342, "percentage": 86.45, "elapsed_time": "0:48:27", "remaining_time": "0:07:35", "throughput": 5543.98, "total_tokens": 16120976} +{"current_steps": 32755, "total_steps": 37885, "loss": 0.0096, "lr": 1.0968890430092825e-07, "epoch": 4.322951036030091, "percentage": 86.46, "elapsed_time": "0:48:28", "remaining_time": "0:07:35", "throughput": 5544.23, "total_tokens": 16123600} +{"current_steps": 32760, "total_steps": 37885, "loss": 0.0, "lr": 1.0947921861815557e-07, "epoch": 4.32361092780784, "percentage": 86.47, "elapsed_time": "0:48:28", "remaining_time": "0:07:35", "throughput": 5544.35, "total_tokens": 16125840} +{"current_steps": 32765, "total_steps": 37885, "loss": 0.0518, "lr": 1.0926972194197015e-07, "epoch": 4.324270819585588, "percentage": 86.49, "elapsed_time": "0:48:28", "remaining_time": "0:07:34", "throughput": 5544.56, "total_tokens": 16128336} +{"current_steps": 32770, "total_steps": 37885, "loss": 0.0, "lr": 1.0906041431683632e-07, "epoch": 4.324930711363336, "percentage": 86.5, "elapsed_time": "0:48:29", "remaining_time": "0:07:34", "throughput": 5544.82, "total_tokens": 16131024} +{"current_steps": 32775, "total_steps": 37885, "loss": 0.0, "lr": 1.0885129578717767e-07, "epoch": 4.325590603141085, "percentage": 86.51, "elapsed_time": "0:48:29", "remaining_time": "0:07:33", "throughput": 5545.09, "total_tokens": 16133712} +{"current_steps": 32780, "total_steps": 37885, "loss": 0.0, "lr": 1.0864236639737823e-07, "epoch": 4.326250494918833, "percentage": 86.53, "elapsed_time": "0:48:29", "remaining_time": "0:07:33", "throughput": 5545.29, "total_tokens": 16136208} +{"current_steps": 32785, "total_steps": 37885, "loss": 0.0, "lr": 1.0843362619178187e-07, "epoch": 4.326910386696582, "percentage": 86.54, "elapsed_time": "0:48:30", "remaining_time": "0:07:32", "throughput": 5545.45, "total_tokens": 16138576} +{"current_steps": 32790, "total_steps": 37885, "loss": 0.0014, "lr": 1.0822507521469227e-07, "epoch": 4.32757027847433, "percentage": 86.55, "elapsed_time": "0:48:30", "remaining_time": "0:07:32", "throughput": 5545.66, "total_tokens": 16141072} +{"current_steps": 32795, "total_steps": 37885, "loss": 0.0, "lr": 1.0801671351037255e-07, "epoch": 4.328230170252079, "percentage": 86.56, "elapsed_time": "0:48:30", "remaining_time": "0:07:31", "throughput": 5545.88, "total_tokens": 16143632} +{"current_steps": 32800, "total_steps": 37885, "loss": 0.0018, "lr": 1.0780854112304626e-07, "epoch": 4.328890062029827, "percentage": 86.58, "elapsed_time": "0:48:31", "remaining_time": "0:07:31", "throughput": 5546.13, "total_tokens": 16146320} +{"current_steps": 32805, "total_steps": 37885, "loss": 0.0, "lr": 1.076005580968965e-07, "epoch": 4.329549953807575, "percentage": 86.59, "elapsed_time": "0:48:31", "remaining_time": "0:07:30", "throughput": 5546.4, "total_tokens": 16149008} +{"current_steps": 32810, "total_steps": 37885, "loss": 0.0, "lr": 1.0739276447606582e-07, "epoch": 4.330209845585324, "percentage": 86.6, "elapsed_time": "0:48:31", "remaining_time": "0:07:30", "throughput": 5546.59, "total_tokens": 16151504} +{"current_steps": 32815, "total_steps": 37885, "loss": 0.0, "lr": 1.0718516030465708e-07, "epoch": 4.330869737363073, "percentage": 86.62, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5546.91, "total_tokens": 16154320} +{"current_steps": 32820, "total_steps": 37885, "loss": 0.0, "lr": 1.0697774562673312e-07, "epoch": 4.331529629140821, "percentage": 86.63, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5547.11, "total_tokens": 16156816} +{"current_steps": 32825, "total_steps": 37885, "loss": 0.0, "lr": 1.0677052048631563e-07, "epoch": 4.332189520918569, "percentage": 86.64, "elapsed_time": "0:48:32", "remaining_time": "0:07:29", "throughput": 5547.22, "total_tokens": 16158992} +{"current_steps": 32830, "total_steps": 37885, "loss": 0.0, "lr": 1.0656348492738687e-07, "epoch": 4.3328494126963175, "percentage": 86.66, "elapsed_time": "0:48:33", "remaining_time": "0:07:28", "throughput": 5547.37, "total_tokens": 16161296} +{"current_steps": 32835, "total_steps": 37885, "loss": 0.0, "lr": 1.0635663899388881e-07, "epoch": 4.333509304474067, "percentage": 86.67, "elapsed_time": "0:48:33", "remaining_time": "0:07:28", "throughput": 5547.53, "total_tokens": 16163664} +{"current_steps": 32840, "total_steps": 37885, "loss": 0.0533, "lr": 1.0614998272972298e-07, "epoch": 4.334169196251815, "percentage": 86.68, "elapsed_time": "0:48:34", "remaining_time": "0:07:27", "throughput": 5547.64, "total_tokens": 16165840} +{"current_steps": 32845, "total_steps": 37885, "loss": 0.0683, "lr": 1.0594351617875053e-07, "epoch": 4.334829088029563, "percentage": 86.7, "elapsed_time": "0:48:34", "remaining_time": "0:07:27", "throughput": 5547.8, "total_tokens": 16168208} +{"current_steps": 32850, "total_steps": 37885, "loss": 0.0, "lr": 1.0573723938479217e-07, "epoch": 4.3354889798073115, "percentage": 86.71, "elapsed_time": "0:48:34", "remaining_time": "0:07:26", "throughput": 5547.99, "total_tokens": 16170640} +{"current_steps": 32855, "total_steps": 37885, "loss": 0.0, "lr": 1.0553115239162935e-07, "epoch": 4.33614887158506, "percentage": 86.72, "elapsed_time": "0:48:35", "remaining_time": "0:07:26", "throughput": 5548.11, "total_tokens": 16172880} +{"current_steps": 32860, "total_steps": 37885, "loss": 0.0, "lr": 1.0532525524300206e-07, "epoch": 4.336808763362809, "percentage": 86.74, "elapsed_time": "0:48:35", "remaining_time": "0:07:25", "throughput": 5548.28, "total_tokens": 16175248} +{"current_steps": 32865, "total_steps": 37885, "loss": 0.0, "lr": 1.0511954798261058e-07, "epoch": 4.337468655140557, "percentage": 86.75, "elapsed_time": "0:48:35", "remaining_time": "0:07:25", "throughput": 5548.48, "total_tokens": 16177680} +{"current_steps": 32870, "total_steps": 37885, "loss": 0.0472, "lr": 1.0491403065411508e-07, "epoch": 4.3381285469183055, "percentage": 86.76, "elapsed_time": "0:48:36", "remaining_time": "0:07:24", "throughput": 5548.65, "total_tokens": 16180048} +{"current_steps": 32875, "total_steps": 37885, "loss": 0.0, "lr": 1.0470870330113457e-07, "epoch": 4.338788438696054, "percentage": 86.78, "elapsed_time": "0:48:36", "remaining_time": "0:07:24", "throughput": 5548.82, "total_tokens": 16182416} +{"current_steps": 32880, "total_steps": 37885, "loss": 0.1348, "lr": 1.0450356596724886e-07, "epoch": 4.339448330473802, "percentage": 86.79, "elapsed_time": "0:48:36", "remaining_time": "0:07:23", "throughput": 5549.02, "total_tokens": 16184848} +{"current_steps": 32885, "total_steps": 37885, "loss": 0.0, "lr": 1.0429861869599622e-07, "epoch": 4.34010822225155, "percentage": 86.8, "elapsed_time": "0:48:37", "remaining_time": "0:07:23", "throughput": 5549.21, "total_tokens": 16187280} +{"current_steps": 32890, "total_steps": 37885, "loss": 0.0004, "lr": 1.0409386153087596e-07, "epoch": 4.3407681140292995, "percentage": 86.82, "elapsed_time": "0:48:37", "remaining_time": "0:07:23", "throughput": 5549.35, "total_tokens": 16189584} +{"current_steps": 32895, "total_steps": 37885, "loss": 0.0, "lr": 1.0388929451534601e-07, "epoch": 4.341428005807048, "percentage": 86.83, "elapsed_time": "0:48:37", "remaining_time": "0:07:22", "throughput": 5549.46, "total_tokens": 16191760} +{"current_steps": 32900, "total_steps": 37885, "loss": 0.0, "lr": 1.0368491769282395e-07, "epoch": 4.342087897584796, "percentage": 86.84, "elapsed_time": "0:48:38", "remaining_time": "0:07:22", "throughput": 5549.64, "total_tokens": 16194128} +{"current_steps": 32905, "total_steps": 37885, "loss": 0.0, "lr": 1.0348073110668743e-07, "epoch": 4.342747789362544, "percentage": 86.85, "elapsed_time": "0:48:38", "remaining_time": "0:07:21", "throughput": 5549.89, "total_tokens": 16196752} +{"current_steps": 32910, "total_steps": 37885, "loss": 0.0, "lr": 1.0327673480027377e-07, "epoch": 4.343407681140293, "percentage": 86.87, "elapsed_time": "0:48:38", "remaining_time": "0:07:21", "throughput": 5550.1, "total_tokens": 16199248} +{"current_steps": 32915, "total_steps": 37885, "loss": 0.0002, "lr": 1.0307292881687968e-07, "epoch": 4.344067572918042, "percentage": 86.88, "elapsed_time": "0:48:39", "remaining_time": "0:07:20", "throughput": 5550.34, "total_tokens": 16201808} +{"current_steps": 32920, "total_steps": 37885, "loss": 0.0, "lr": 1.0286931319976133e-07, "epoch": 4.34472746469579, "percentage": 86.89, "elapsed_time": "0:48:39", "remaining_time": "0:07:20", "throughput": 5550.55, "total_tokens": 16204304} +{"current_steps": 32925, "total_steps": 37885, "loss": 0.0, "lr": 1.026658879921346e-07, "epoch": 4.345387356473538, "percentage": 86.91, "elapsed_time": "0:48:39", "remaining_time": "0:07:19", "throughput": 5550.78, "total_tokens": 16206864} +{"current_steps": 32930, "total_steps": 37885, "loss": 0.0061, "lr": 1.024626532371755e-07, "epoch": 4.346047248251287, "percentage": 86.92, "elapsed_time": "0:48:40", "remaining_time": "0:07:19", "throughput": 5550.9, "total_tokens": 16209104} +{"current_steps": 32935, "total_steps": 37885, "loss": 0.0001, "lr": 1.0225960897801856e-07, "epoch": 4.346707140029035, "percentage": 86.93, "elapsed_time": "0:48:40", "remaining_time": "0:07:18", "throughput": 5551.09, "total_tokens": 16211536} +{"current_steps": 32940, "total_steps": 37885, "loss": 0.0, "lr": 1.0205675525775858e-07, "epoch": 4.347367031806784, "percentage": 86.95, "elapsed_time": "0:48:40", "remaining_time": "0:07:18", "throughput": 5551.23, "total_tokens": 16213840} +{"current_steps": 32945, "total_steps": 37885, "loss": 0.0, "lr": 1.0185409211945017e-07, "epoch": 4.348026923584532, "percentage": 86.96, "elapsed_time": "0:48:41", "remaining_time": "0:07:18", "throughput": 5551.38, "total_tokens": 16216144} +{"current_steps": 32950, "total_steps": 37885, "loss": 0.0, "lr": 1.0165161960610669e-07, "epoch": 4.348686815362281, "percentage": 86.97, "elapsed_time": "0:48:41", "remaining_time": "0:07:17", "throughput": 5551.55, "total_tokens": 16218512} +{"current_steps": 32955, "total_steps": 37885, "loss": 0.0, "lr": 1.0144933776070163e-07, "epoch": 4.349346707140029, "percentage": 86.99, "elapsed_time": "0:48:41", "remaining_time": "0:07:17", "throughput": 5551.82, "total_tokens": 16221200} +{"current_steps": 32960, "total_steps": 37885, "loss": 0.0, "lr": 1.012472466261678e-07, "epoch": 4.350006598917777, "percentage": 87.0, "elapsed_time": "0:48:42", "remaining_time": "0:07:16", "throughput": 5552.01, "total_tokens": 16223632} +{"current_steps": 32965, "total_steps": 37885, "loss": 0.001, "lr": 1.0104534624539785e-07, "epoch": 4.3506664906955255, "percentage": 87.01, "elapsed_time": "0:48:42", "remaining_time": "0:07:16", "throughput": 5552.24, "total_tokens": 16226192} +{"current_steps": 32970, "total_steps": 37885, "loss": 0.0002, "lr": 1.0084363666124318e-07, "epoch": 4.351326382473275, "percentage": 87.03, "elapsed_time": "0:48:42", "remaining_time": "0:07:15", "throughput": 5552.36, "total_tokens": 16228432} +{"current_steps": 32975, "total_steps": 37885, "loss": 0.0, "lr": 1.0064211791651544e-07, "epoch": 4.351986274251023, "percentage": 87.04, "elapsed_time": "0:48:43", "remaining_time": "0:07:15", "throughput": 5552.51, "total_tokens": 16230736} +{"current_steps": 32980, "total_steps": 37885, "loss": 0.0001, "lr": 1.0044079005398576e-07, "epoch": 4.352646166028771, "percentage": 87.05, "elapsed_time": "0:48:43", "remaining_time": "0:07:14", "throughput": 5552.62, "total_tokens": 16232976} +{"current_steps": 32985, "total_steps": 37885, "loss": 0.0, "lr": 1.0023965311638415e-07, "epoch": 4.3533060578065195, "percentage": 87.07, "elapsed_time": "0:48:43", "remaining_time": "0:07:14", "throughput": 5552.81, "total_tokens": 16235408} +{"current_steps": 32990, "total_steps": 37885, "loss": 0.0, "lr": 1.0003870714640061e-07, "epoch": 4.353965949584268, "percentage": 87.08, "elapsed_time": "0:48:44", "remaining_time": "0:07:13", "throughput": 5553.06, "total_tokens": 16238032} +{"current_steps": 32995, "total_steps": 37885, "loss": 0.002, "lr": 9.983795218668456e-08, "epoch": 4.354625841362017, "percentage": 87.09, "elapsed_time": "0:48:44", "remaining_time": "0:07:13", "throughput": 5553.41, "total_tokens": 16240976} +{"current_steps": 33000, "total_steps": 37885, "loss": 0.0384, "lr": 9.963738827984458e-08, "epoch": 4.355285733139765, "percentage": 87.11, "elapsed_time": "0:48:44", "remaining_time": "0:07:12", "throughput": 5553.48, "total_tokens": 16243088} +{"current_steps": 33005, "total_steps": 37885, "loss": 0.0, "lr": 9.943701546844906e-08, "epoch": 4.3559456249175135, "percentage": 87.12, "elapsed_time": "0:48:45", "remaining_time": "0:07:12", "throughput": 5553.67, "total_tokens": 16245520} +{"current_steps": 33010, "total_steps": 37885, "loss": 0.0337, "lr": 9.923683379502557e-08, "epoch": 4.356605516695262, "percentage": 87.13, "elapsed_time": "0:48:45", "remaining_time": "0:07:12", "throughput": 5553.88, "total_tokens": 16248016} +{"current_steps": 33015, "total_steps": 37885, "loss": 0.0, "lr": 9.903684330206152e-08, "epoch": 4.35726540847301, "percentage": 87.15, "elapsed_time": "0:48:45", "remaining_time": "0:07:11", "throughput": 5554.02, "total_tokens": 16250320} +{"current_steps": 33020, "total_steps": 37885, "loss": 0.0, "lr": 9.8837044032003e-08, "epoch": 4.357925300250759, "percentage": 87.16, "elapsed_time": "0:48:46", "remaining_time": "0:07:11", "throughput": 5554.31, "total_tokens": 16253072} +{"current_steps": 33025, "total_steps": 37885, "loss": 0.0, "lr": 9.863743602725627e-08, "epoch": 4.3585851920285075, "percentage": 87.17, "elapsed_time": "0:48:46", "remaining_time": "0:07:10", "throughput": 5554.56, "total_tokens": 16255696} +{"current_steps": 33030, "total_steps": 37885, "loss": 0.0, "lr": 9.843801933018669e-08, "epoch": 4.359245083806256, "percentage": 87.18, "elapsed_time": "0:48:46", "remaining_time": "0:07:10", "throughput": 5554.78, "total_tokens": 16258256} +{"current_steps": 33035, "total_steps": 37885, "loss": 0.0, "lr": 9.823879398311874e-08, "epoch": 4.359904975584004, "percentage": 87.2, "elapsed_time": "0:48:47", "remaining_time": "0:07:09", "throughput": 5554.99, "total_tokens": 16260752} +{"current_steps": 33040, "total_steps": 37885, "loss": 0.0226, "lr": 9.803976002833692e-08, "epoch": 4.360564867361752, "percentage": 87.21, "elapsed_time": "0:48:47", "remaining_time": "0:07:09", "throughput": 5555.26, "total_tokens": 16263440} +{"current_steps": 33045, "total_steps": 37885, "loss": 0.0009, "lr": 9.78409175080841e-08, "epoch": 4.3612247591395015, "percentage": 87.22, "elapsed_time": "0:48:47", "remaining_time": "0:07:08", "throughput": 5555.49, "total_tokens": 16266000} +{"current_steps": 33050, "total_steps": 37885, "loss": 0.0, "lr": 9.764226646456408e-08, "epoch": 4.36188465091725, "percentage": 87.24, "elapsed_time": "0:48:48", "remaining_time": "0:07:08", "throughput": 5555.73, "total_tokens": 16268624} +{"current_steps": 33055, "total_steps": 37885, "loss": 0.0, "lr": 9.744380693993858e-08, "epoch": 4.362544542694998, "percentage": 87.25, "elapsed_time": "0:48:48", "remaining_time": "0:07:07", "throughput": 5555.89, "total_tokens": 16270992} +{"current_steps": 33060, "total_steps": 37885, "loss": 0.0, "lr": 9.724553897632893e-08, "epoch": 4.363204434472746, "percentage": 87.26, "elapsed_time": "0:48:48", "remaining_time": "0:07:07", "throughput": 5556.08, "total_tokens": 16273424} +{"current_steps": 33065, "total_steps": 37885, "loss": 0.0441, "lr": 9.704746261581675e-08, "epoch": 4.363864326250495, "percentage": 87.28, "elapsed_time": "0:48:49", "remaining_time": "0:07:07", "throughput": 5556.23, "total_tokens": 16275728} +{"current_steps": 33070, "total_steps": 37885, "loss": 0.0, "lr": 9.684957790044179e-08, "epoch": 4.364524218028244, "percentage": 87.29, "elapsed_time": "0:48:49", "remaining_time": "0:07:06", "throughput": 5556.33, "total_tokens": 16277904} +{"current_steps": 33075, "total_steps": 37885, "loss": 0.0001, "lr": 9.665188487220399e-08, "epoch": 4.365184109805992, "percentage": 87.3, "elapsed_time": "0:48:49", "remaining_time": "0:07:06", "throughput": 5556.64, "total_tokens": 16280720} +{"current_steps": 33080, "total_steps": 37885, "loss": 0.0, "lr": 9.64543835730619e-08, "epoch": 4.36584400158374, "percentage": 87.32, "elapsed_time": "0:48:50", "remaining_time": "0:07:05", "throughput": 5556.8, "total_tokens": 16283088} +{"current_steps": 33085, "total_steps": 37885, "loss": 0.0, "lr": 9.625707404493399e-08, "epoch": 4.366503893361489, "percentage": 87.33, "elapsed_time": "0:48:50", "remaining_time": "0:07:05", "throughput": 5556.99, "total_tokens": 16285520} +{"current_steps": 33090, "total_steps": 37885, "loss": 0.0001, "lr": 9.605995632969787e-08, "epoch": 4.367163785139237, "percentage": 87.34, "elapsed_time": "0:48:50", "remaining_time": "0:07:04", "throughput": 5557.16, "total_tokens": 16287888} +{"current_steps": 33095, "total_steps": 37885, "loss": 0.0226, "lr": 9.586303046919008e-08, "epoch": 4.367823676916986, "percentage": 87.36, "elapsed_time": "0:48:51", "remaining_time": "0:07:04", "throughput": 5557.32, "total_tokens": 16290256} +{"current_steps": 33100, "total_steps": 37885, "loss": 0.0, "lr": 9.566629650520675e-08, "epoch": 4.368483568694734, "percentage": 87.37, "elapsed_time": "0:48:51", "remaining_time": "0:07:03", "throughput": 5557.45, "total_tokens": 16292496} +{"current_steps": 33105, "total_steps": 37885, "loss": 0.0001, "lr": 9.546975447950345e-08, "epoch": 4.369143460472483, "percentage": 87.38, "elapsed_time": "0:48:51", "remaining_time": "0:07:03", "throughput": 5557.64, "total_tokens": 16294864} +{"current_steps": 33110, "total_steps": 37885, "loss": 0.0, "lr": 9.527340443379461e-08, "epoch": 4.369803352250231, "percentage": 87.4, "elapsed_time": "0:48:52", "remaining_time": "0:07:02", "throughput": 5557.94, "total_tokens": 16297616} +{"current_steps": 33115, "total_steps": 37885, "loss": 0.0, "lr": 9.507724640975412e-08, "epoch": 4.370463244027979, "percentage": 87.41, "elapsed_time": "0:48:52", "remaining_time": "0:07:02", "throughput": 5558.14, "total_tokens": 16300048} +{"current_steps": 33120, "total_steps": 37885, "loss": 0.0, "lr": 9.488128044901511e-08, "epoch": 4.371123135805728, "percentage": 87.42, "elapsed_time": "0:48:52", "remaining_time": "0:07:01", "throughput": 5558.39, "total_tokens": 16302608} +{"current_steps": 33125, "total_steps": 37885, "loss": 0.0715, "lr": 9.468550659317009e-08, "epoch": 4.371783027583477, "percentage": 87.44, "elapsed_time": "0:48:53", "remaining_time": "0:07:01", "throughput": 5558.66, "total_tokens": 16305232} +{"current_steps": 33130, "total_steps": 37885, "loss": 0.028, "lr": 9.44899248837705e-08, "epoch": 4.372442919361225, "percentage": 87.45, "elapsed_time": "0:48:53", "remaining_time": "0:07:01", "throughput": 5558.83, "total_tokens": 16307536} +{"current_steps": 33135, "total_steps": 37885, "loss": 0.0003, "lr": 9.4294535362327e-08, "epoch": 4.373102811138973, "percentage": 87.46, "elapsed_time": "0:48:53", "remaining_time": "0:07:00", "throughput": 5559.07, "total_tokens": 16310160} +{"current_steps": 33140, "total_steps": 37885, "loss": 0.0001, "lr": 9.409933807031012e-08, "epoch": 4.373762702916721, "percentage": 87.48, "elapsed_time": "0:48:54", "remaining_time": "0:07:00", "throughput": 5559.39, "total_tokens": 16312976} +{"current_steps": 33145, "total_steps": 37885, "loss": 0.0, "lr": 9.390433304914846e-08, "epoch": 4.37442259469447, "percentage": 87.49, "elapsed_time": "0:48:54", "remaining_time": "0:06:59", "throughput": 5559.54, "total_tokens": 16315216} +{"current_steps": 33150, "total_steps": 37885, "loss": 0.0502, "lr": 9.370952034023061e-08, "epoch": 4.375082486472219, "percentage": 87.5, "elapsed_time": "0:48:54", "remaining_time": "0:06:59", "throughput": 5559.73, "total_tokens": 16317584} +{"current_steps": 33155, "total_steps": 37885, "loss": 0.0, "lr": 9.351489998490447e-08, "epoch": 4.375742378249967, "percentage": 87.51, "elapsed_time": "0:48:55", "remaining_time": "0:06:58", "throughput": 5559.92, "total_tokens": 16319952} +{"current_steps": 33160, "total_steps": 37885, "loss": 0.0, "lr": 9.332047202447635e-08, "epoch": 4.376402270027715, "percentage": 87.53, "elapsed_time": "0:48:55", "remaining_time": "0:06:58", "throughput": 5560.18, "total_tokens": 16322576} +{"current_steps": 33165, "total_steps": 37885, "loss": 0.0, "lr": 9.312623650021245e-08, "epoch": 4.377062161805464, "percentage": 87.54, "elapsed_time": "0:48:55", "remaining_time": "0:06:57", "throughput": 5560.48, "total_tokens": 16325328} +{"current_steps": 33170, "total_steps": 37885, "loss": 0.0001, "lr": 9.29321934533378e-08, "epoch": 4.377722053583212, "percentage": 87.55, "elapsed_time": "0:48:56", "remaining_time": "0:06:57", "throughput": 5560.63, "total_tokens": 16327568} +{"current_steps": 33175, "total_steps": 37885, "loss": 0.0, "lr": 9.273834292503668e-08, "epoch": 4.378381945360961, "percentage": 87.57, "elapsed_time": "0:48:56", "remaining_time": "0:06:56", "throughput": 5560.95, "total_tokens": 16330384} +{"current_steps": 33180, "total_steps": 37885, "loss": 0.0, "lr": 9.254468495645251e-08, "epoch": 4.379041837138709, "percentage": 87.58, "elapsed_time": "0:48:56", "remaining_time": "0:06:56", "throughput": 5561.1, "total_tokens": 16332624} +{"current_steps": 33185, "total_steps": 37885, "loss": 0.0, "lr": 9.235121958868731e-08, "epoch": 4.379701728916458, "percentage": 87.59, "elapsed_time": "0:48:57", "remaining_time": "0:06:56", "throughput": 5561.27, "total_tokens": 16334928} +{"current_steps": 33190, "total_steps": 37885, "loss": 0.0004, "lr": 9.215794686280343e-08, "epoch": 4.380361620694206, "percentage": 87.61, "elapsed_time": "0:48:57", "remaining_time": "0:06:55", "throughput": 5561.53, "total_tokens": 16337552} +{"current_steps": 33195, "total_steps": 37885, "loss": 0.0, "lr": 9.196486681982096e-08, "epoch": 4.381021512471954, "percentage": 87.62, "elapsed_time": "0:48:57", "remaining_time": "0:06:55", "throughput": 5561.78, "total_tokens": 16340112} +{"current_steps": 33200, "total_steps": 37885, "loss": 0.0001, "lr": 9.177197950072012e-08, "epoch": 4.381681404249703, "percentage": 87.63, "elapsed_time": "0:48:58", "remaining_time": "0:06:54", "throughput": 5561.95, "total_tokens": 16342416} +{"current_steps": 33205, "total_steps": 37885, "loss": 0.0366, "lr": 9.157928494644007e-08, "epoch": 4.382341296027452, "percentage": 87.65, "elapsed_time": "0:48:58", "remaining_time": "0:06:54", "throughput": 5561.69, "total_tokens": 16344912} +{"current_steps": 33210, "total_steps": 37885, "loss": 0.0001, "lr": 9.138678319787818e-08, "epoch": 4.3830011878052, "percentage": 87.66, "elapsed_time": "0:48:59", "remaining_time": "0:06:53", "throughput": 5562.01, "total_tokens": 16347728} +{"current_steps": 33215, "total_steps": 37885, "loss": 0.0, "lr": 9.119447429589212e-08, "epoch": 4.383661079582948, "percentage": 87.67, "elapsed_time": "0:48:59", "remaining_time": "0:06:53", "throughput": 5562.28, "total_tokens": 16350352} +{"current_steps": 33220, "total_steps": 37885, "loss": 0.0, "lr": 9.100235828129743e-08, "epoch": 4.3843209713606965, "percentage": 87.69, "elapsed_time": "0:48:59", "remaining_time": "0:06:52", "throughput": 5562.49, "total_tokens": 16352784} +{"current_steps": 33225, "total_steps": 37885, "loss": 0.0066, "lr": 9.08104351948702e-08, "epoch": 4.384980863138446, "percentage": 87.7, "elapsed_time": "0:49:00", "remaining_time": "0:06:52", "throughput": 5562.73, "total_tokens": 16355344} +{"current_steps": 33230, "total_steps": 37885, "loss": 0.0003, "lr": 9.061870507734426e-08, "epoch": 4.385640754916194, "percentage": 87.71, "elapsed_time": "0:49:00", "remaining_time": "0:06:51", "throughput": 5562.92, "total_tokens": 16357712} +{"current_steps": 33235, "total_steps": 37885, "loss": 0.0, "lr": 9.042716796941275e-08, "epoch": 4.386300646693942, "percentage": 87.73, "elapsed_time": "0:49:00", "remaining_time": "0:06:51", "throughput": 5563.12, "total_tokens": 16360144} +{"current_steps": 33240, "total_steps": 37885, "loss": 0.0, "lr": 9.023582391172813e-08, "epoch": 4.3869605384716905, "percentage": 87.74, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.33, "total_tokens": 16362576} +{"current_steps": 33245, "total_steps": 37885, "loss": 0.0, "lr": 9.004467294490203e-08, "epoch": 4.387620430249439, "percentage": 87.75, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.55, "total_tokens": 16365072} +{"current_steps": 33250, "total_steps": 37885, "loss": 0.0, "lr": 8.98537151095048e-08, "epoch": 4.388280322027187, "percentage": 87.77, "elapsed_time": "0:49:01", "remaining_time": "0:06:50", "throughput": 5563.77, "total_tokens": 16367568} +{"current_steps": 33255, "total_steps": 37885, "loss": 0.0, "lr": 8.966295044606565e-08, "epoch": 4.388940213804936, "percentage": 87.78, "elapsed_time": "0:49:02", "remaining_time": "0:06:49", "throughput": 5564.02, "total_tokens": 16370128} +{"current_steps": 33260, "total_steps": 37885, "loss": 0.0, "lr": 8.94723789950731e-08, "epoch": 4.3896001055826845, "percentage": 87.79, "elapsed_time": "0:49:02", "remaining_time": "0:06:49", "throughput": 5564.26, "total_tokens": 16372688} +{"current_steps": 33265, "total_steps": 37885, "loss": 0.0, "lr": 8.928200079697479e-08, "epoch": 4.390259997360433, "percentage": 87.81, "elapsed_time": "0:49:02", "remaining_time": "0:06:48", "throughput": 5564.47, "total_tokens": 16375120} +{"current_steps": 33270, "total_steps": 37885, "loss": 0.0006, "lr": 8.909181589217674e-08, "epoch": 4.390919889138181, "percentage": 87.82, "elapsed_time": "0:49:03", "remaining_time": "0:06:48", "throughput": 5564.7, "total_tokens": 16377616} +{"current_steps": 33275, "total_steps": 37885, "loss": 0.0, "lr": 8.890182432104443e-08, "epoch": 4.391579780915929, "percentage": 87.83, "elapsed_time": "0:49:03", "remaining_time": "0:06:47", "throughput": 5564.93, "total_tokens": 16380112} +{"current_steps": 33280, "total_steps": 37885, "loss": 0.0, "lr": 8.871202612390249e-08, "epoch": 4.3922396726936785, "percentage": 87.84, "elapsed_time": "0:49:03", "remaining_time": "0:06:47", "throughput": 5565.13, "total_tokens": 16382544} +{"current_steps": 33285, "total_steps": 37885, "loss": 0.0, "lr": 8.852242134103383e-08, "epoch": 4.392899564471427, "percentage": 87.86, "elapsed_time": "0:49:04", "remaining_time": "0:06:46", "throughput": 5565.37, "total_tokens": 16385104} +{"current_steps": 33290, "total_steps": 37885, "loss": 0.0, "lr": 8.833301001268078e-08, "epoch": 4.393559456249175, "percentage": 87.87, "elapsed_time": "0:49:04", "remaining_time": "0:06:46", "throughput": 5565.58, "total_tokens": 16387536} +{"current_steps": 33295, "total_steps": 37885, "loss": 0.0188, "lr": 8.814379217904455e-08, "epoch": 4.394219348026923, "percentage": 87.88, "elapsed_time": "0:49:04", "remaining_time": "0:06:45", "throughput": 5565.75, "total_tokens": 16389840} +{"current_steps": 33300, "total_steps": 37885, "loss": 0.0153, "lr": 8.795476788028555e-08, "epoch": 4.394879239804672, "percentage": 87.9, "elapsed_time": "0:49:05", "remaining_time": "0:06:45", "throughput": 5565.9, "total_tokens": 16392080} +{"current_steps": 33305, "total_steps": 37885, "loss": 0.0001, "lr": 8.776593715652226e-08, "epoch": 4.395539131582421, "percentage": 87.91, "elapsed_time": "0:49:05", "remaining_time": "0:06:45", "throughput": 5566.06, "total_tokens": 16394384} +{"current_steps": 33310, "total_steps": 37885, "loss": 0.002, "lr": 8.757730004783303e-08, "epoch": 4.396199023360169, "percentage": 87.92, "elapsed_time": "0:49:05", "remaining_time": "0:06:44", "throughput": 5566.35, "total_tokens": 16397072} +{"current_steps": 33315, "total_steps": 37885, "loss": 0.0626, "lr": 8.738885659425477e-08, "epoch": 4.396858915137917, "percentage": 87.94, "elapsed_time": "0:49:06", "remaining_time": "0:06:44", "throughput": 5566.61, "total_tokens": 16399696} +{"current_steps": 33320, "total_steps": 37885, "loss": 0.0, "lr": 8.72006068357829e-08, "epoch": 4.397518806915666, "percentage": 87.95, "elapsed_time": "0:49:06", "remaining_time": "0:06:43", "throughput": 5566.86, "total_tokens": 16402256} +{"current_steps": 33325, "total_steps": 37885, "loss": 0.0001, "lr": 8.701255081237225e-08, "epoch": 4.398178698693414, "percentage": 87.96, "elapsed_time": "0:49:06", "remaining_time": "0:06:43", "throughput": 5567.14, "total_tokens": 16404944} +{"current_steps": 33330, "total_steps": 37885, "loss": 0.0, "lr": 8.682468856393654e-08, "epoch": 4.398838590471163, "percentage": 87.98, "elapsed_time": "0:49:07", "remaining_time": "0:06:42", "throughput": 5567.31, "total_tokens": 16407248} +{"current_steps": 33335, "total_steps": 37885, "loss": 0.0, "lr": 8.66370201303478e-08, "epoch": 4.399498482248911, "percentage": 87.99, "elapsed_time": "0:49:07", "remaining_time": "0:06:42", "throughput": 5567.43, "total_tokens": 16409424} +{"current_steps": 33340, "total_steps": 37885, "loss": 0.0004, "lr": 8.644954555143757e-08, "epoch": 4.40015837402666, "percentage": 88.0, "elapsed_time": "0:49:07", "remaining_time": "0:06:41", "throughput": 5567.7, "total_tokens": 16412048} +{"current_steps": 33345, "total_steps": 37885, "loss": 0.0002, "lr": 8.626226486699573e-08, "epoch": 4.400818265804408, "percentage": 88.02, "elapsed_time": "0:49:08", "remaining_time": "0:06:41", "throughput": 5567.98, "total_tokens": 16414736} +{"current_steps": 33350, "total_steps": 37885, "loss": 0.0, "lr": 8.607517811677168e-08, "epoch": 4.401478157582156, "percentage": 88.03, "elapsed_time": "0:49:08", "remaining_time": "0:06:40", "throughput": 5568.21, "total_tokens": 16417232} +{"current_steps": 33355, "total_steps": 37885, "loss": 0.0188, "lr": 8.588828534047276e-08, "epoch": 4.402138049359905, "percentage": 88.04, "elapsed_time": "0:49:08", "remaining_time": "0:06:40", "throughput": 5568.43, "total_tokens": 16419728} +{"current_steps": 33360, "total_steps": 37885, "loss": 0.0, "lr": 8.570158657776582e-08, "epoch": 4.402797941137654, "percentage": 88.06, "elapsed_time": "0:49:09", "remaining_time": "0:06:40", "throughput": 5568.67, "total_tokens": 16422288} +{"current_steps": 33365, "total_steps": 37885, "loss": 0.0782, "lr": 8.551508186827639e-08, "epoch": 4.403457832915402, "percentage": 88.07, "elapsed_time": "0:49:09", "remaining_time": "0:06:39", "throughput": 5568.89, "total_tokens": 16424784} +{"current_steps": 33370, "total_steps": 37885, "loss": 0.0, "lr": 8.532877125158854e-08, "epoch": 4.40411772469315, "percentage": 88.08, "elapsed_time": "0:49:09", "remaining_time": "0:06:39", "throughput": 5569.11, "total_tokens": 16427280} +{"current_steps": 33375, "total_steps": 37885, "loss": 0.0366, "lr": 8.514265476724547e-08, "epoch": 4.4047776164708985, "percentage": 88.1, "elapsed_time": "0:49:10", "remaining_time": "0:06:38", "throughput": 5569.35, "total_tokens": 16429840} +{"current_steps": 33380, "total_steps": 37885, "loss": 0.0, "lr": 8.49567324547491e-08, "epoch": 4.405437508248648, "percentage": 88.11, "elapsed_time": "0:49:10", "remaining_time": "0:06:38", "throughput": 5569.54, "total_tokens": 16432208} +{"current_steps": 33385, "total_steps": 37885, "loss": 0.0001, "lr": 8.47710043535601e-08, "epoch": 4.406097400026396, "percentage": 88.12, "elapsed_time": "0:49:10", "remaining_time": "0:06:37", "throughput": 5569.84, "total_tokens": 16434960} +{"current_steps": 33390, "total_steps": 37885, "loss": 0.0, "lr": 8.458547050309794e-08, "epoch": 4.406757291804144, "percentage": 88.14, "elapsed_time": "0:49:11", "remaining_time": "0:06:37", "throughput": 5570.1, "total_tokens": 16437584} +{"current_steps": 33395, "total_steps": 37885, "loss": 0.0, "lr": 8.440013094274035e-08, "epoch": 4.4074171835818925, "percentage": 88.15, "elapsed_time": "0:49:11", "remaining_time": "0:06:36", "throughput": 5570.35, "total_tokens": 16440144} +{"current_steps": 33400, "total_steps": 37885, "loss": 0.0004, "lr": 8.421498571182517e-08, "epoch": 4.408077075359641, "percentage": 88.16, "elapsed_time": "0:49:11", "remaining_time": "0:06:36", "throughput": 5570.59, "total_tokens": 16442704} +{"current_steps": 33405, "total_steps": 37885, "loss": 0.0, "lr": 8.403003484964743e-08, "epoch": 4.40873696713739, "percentage": 88.17, "elapsed_time": "0:49:12", "remaining_time": "0:06:35", "throughput": 5570.75, "total_tokens": 16445008} +{"current_steps": 33410, "total_steps": 37885, "loss": 0.0, "lr": 8.384527839546196e-08, "epoch": 4.409396858915138, "percentage": 88.19, "elapsed_time": "0:49:12", "remaining_time": "0:06:35", "throughput": 5570.9, "total_tokens": 16447248} +{"current_steps": 33415, "total_steps": 37885, "loss": 0.0, "lr": 8.366071638848183e-08, "epoch": 4.4100567506928865, "percentage": 88.2, "elapsed_time": "0:49:12", "remaining_time": "0:06:34", "throughput": 5571.23, "total_tokens": 16450128} +{"current_steps": 33420, "total_steps": 37885, "loss": 0.028, "lr": 8.347634886787901e-08, "epoch": 4.410716642470635, "percentage": 88.21, "elapsed_time": "0:49:13", "remaining_time": "0:06:34", "throughput": 5571.49, "total_tokens": 16452752} +{"current_steps": 33425, "total_steps": 37885, "loss": 0.0, "lr": 8.329217587278437e-08, "epoch": 4.411376534248383, "percentage": 88.23, "elapsed_time": "0:49:13", "remaining_time": "0:06:34", "throughput": 5571.71, "total_tokens": 16455248} +{"current_steps": 33430, "total_steps": 37885, "loss": 0.0, "lr": 8.310819744228691e-08, "epoch": 4.412036426026131, "percentage": 88.24, "elapsed_time": "0:49:13", "remaining_time": "0:06:33", "throughput": 5571.9, "total_tokens": 16457616} +{"current_steps": 33435, "total_steps": 37885, "loss": 0.0001, "lr": 8.29244136154349e-08, "epoch": 4.4126963178038805, "percentage": 88.25, "elapsed_time": "0:49:14", "remaining_time": "0:06:33", "throughput": 5572.08, "total_tokens": 16459984} +{"current_steps": 33440, "total_steps": 37885, "loss": 0.0, "lr": 8.274082443123543e-08, "epoch": 4.413356209581629, "percentage": 88.27, "elapsed_time": "0:49:14", "remaining_time": "0:06:32", "throughput": 5572.3, "total_tokens": 16462480} +{"current_steps": 33445, "total_steps": 37885, "loss": 0.0308, "lr": 8.255742992865356e-08, "epoch": 4.414016101359377, "percentage": 88.28, "elapsed_time": "0:49:14", "remaining_time": "0:06:32", "throughput": 5572.55, "total_tokens": 16465040} +{"current_steps": 33450, "total_steps": 37885, "loss": 0.0, "lr": 8.237423014661348e-08, "epoch": 4.414675993137125, "percentage": 88.29, "elapsed_time": "0:49:15", "remaining_time": "0:06:31", "throughput": 5572.83, "total_tokens": 16467728} +{"current_steps": 33455, "total_steps": 37885, "loss": 0.0, "lr": 8.219122512399813e-08, "epoch": 4.415335884914874, "percentage": 88.31, "elapsed_time": "0:49:15", "remaining_time": "0:06:31", "throughput": 5572.97, "total_tokens": 16469968} +{"current_steps": 33460, "total_steps": 37885, "loss": 0.0002, "lr": 8.200841489964927e-08, "epoch": 4.415995776692623, "percentage": 88.32, "elapsed_time": "0:49:15", "remaining_time": "0:06:30", "throughput": 5573.23, "total_tokens": 16472592} +{"current_steps": 33465, "total_steps": 37885, "loss": 0.0, "lr": 8.182579951236657e-08, "epoch": 4.416655668470371, "percentage": 88.33, "elapsed_time": "0:49:15", "remaining_time": "0:06:30", "throughput": 5573.43, "total_tokens": 16475024} +{"current_steps": 33470, "total_steps": 37885, "loss": 0.0, "lr": 8.164337900090901e-08, "epoch": 4.417315560248119, "percentage": 88.35, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5573.65, "total_tokens": 16477520} +{"current_steps": 33475, "total_steps": 37885, "loss": 0.0003, "lr": 8.146115340399418e-08, "epoch": 4.417975452025868, "percentage": 88.36, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5573.88, "total_tokens": 16480016} +{"current_steps": 33480, "total_steps": 37885, "loss": 0.0, "lr": 8.127912276029781e-08, "epoch": 4.418635343803616, "percentage": 88.37, "elapsed_time": "0:49:16", "remaining_time": "0:06:29", "throughput": 5574.02, "total_tokens": 16482256} +{"current_steps": 33485, "total_steps": 37885, "loss": 0.0, "lr": 8.109728710845488e-08, "epoch": 4.419295235581365, "percentage": 88.39, "elapsed_time": "0:49:17", "remaining_time": "0:06:28", "throughput": 5574.16, "total_tokens": 16484496} +{"current_steps": 33490, "total_steps": 37885, "loss": 0.0, "lr": 8.091564648705874e-08, "epoch": 4.419955127359113, "percentage": 88.4, "elapsed_time": "0:49:17", "remaining_time": "0:06:28", "throughput": 5574.34, "total_tokens": 16486864} +{"current_steps": 33495, "total_steps": 37885, "loss": 0.0006, "lr": 8.073420093466087e-08, "epoch": 4.420615019136862, "percentage": 88.41, "elapsed_time": "0:49:17", "remaining_time": "0:06:27", "throughput": 5574.51, "total_tokens": 16489168} +{"current_steps": 33500, "total_steps": 37885, "loss": 0.0426, "lr": 8.055295048977218e-08, "epoch": 4.42127491091461, "percentage": 88.43, "elapsed_time": "0:49:18", "remaining_time": "0:06:27", "throughput": 5574.77, "total_tokens": 16491792} +{"current_steps": 33505, "total_steps": 37885, "loss": 0.0472, "lr": 8.037189519086163e-08, "epoch": 4.421934802692358, "percentage": 88.44, "elapsed_time": "0:49:18", "remaining_time": "0:06:26", "throughput": 5574.93, "total_tokens": 16494096} +{"current_steps": 33510, "total_steps": 37885, "loss": 0.0001, "lr": 8.019103507635704e-08, "epoch": 4.4225946944701064, "percentage": 88.45, "elapsed_time": "0:49:18", "remaining_time": "0:06:26", "throughput": 5575.2, "total_tokens": 16496720} +{"current_steps": 33515, "total_steps": 37885, "loss": 0.0, "lr": 8.00103701846443e-08, "epoch": 4.423254586247856, "percentage": 88.47, "elapsed_time": "0:49:19", "remaining_time": "0:06:25", "throughput": 5575.4, "total_tokens": 16499152} +{"current_steps": 33520, "total_steps": 37885, "loss": 0.0025, "lr": 7.982990055406846e-08, "epoch": 4.423914478025604, "percentage": 88.48, "elapsed_time": "0:49:19", "remaining_time": "0:06:25", "throughput": 5575.58, "total_tokens": 16501520} +{"current_steps": 33525, "total_steps": 37885, "loss": 0.0001, "lr": 7.964962622293314e-08, "epoch": 4.424574369803352, "percentage": 88.49, "elapsed_time": "0:49:19", "remaining_time": "0:06:24", "throughput": 5575.74, "total_tokens": 16503824} +{"current_steps": 33530, "total_steps": 37885, "loss": 0.0, "lr": 7.946954722949972e-08, "epoch": 4.4252342615811004, "percentage": 88.5, "elapsed_time": "0:49:20", "remaining_time": "0:06:24", "throughput": 5576.03, "total_tokens": 16506512} +{"current_steps": 33535, "total_steps": 37885, "loss": 0.0, "lr": 7.928966361198897e-08, "epoch": 4.425894153358849, "percentage": 88.52, "elapsed_time": "0:49:20", "remaining_time": "0:06:24", "throughput": 5576.2, "total_tokens": 16508880} +{"current_steps": 33540, "total_steps": 37885, "loss": 0.0001, "lr": 7.910997540858011e-08, "epoch": 4.426554045136598, "percentage": 88.53, "elapsed_time": "0:49:20", "remaining_time": "0:06:23", "throughput": 5576.35, "total_tokens": 16511120} +{"current_steps": 33545, "total_steps": 37885, "loss": 0.0, "lr": 7.89304826574102e-08, "epoch": 4.427213936914346, "percentage": 88.54, "elapsed_time": "0:49:21", "remaining_time": "0:06:23", "throughput": 5576.51, "total_tokens": 16513424} +{"current_steps": 33550, "total_steps": 37885, "loss": 0.0, "lr": 7.875118539657566e-08, "epoch": 4.4278738286920944, "percentage": 88.56, "elapsed_time": "0:49:21", "remaining_time": "0:06:22", "throughput": 5576.65, "total_tokens": 16515664} +{"current_steps": 33555, "total_steps": 37885, "loss": 0.0, "lr": 7.857208366413048e-08, "epoch": 4.428533720469843, "percentage": 88.57, "elapsed_time": "0:49:21", "remaining_time": "0:06:22", "throughput": 5576.88, "total_tokens": 16518224} +{"current_steps": 33560, "total_steps": 37885, "loss": 0.0006, "lr": 7.839317749808838e-08, "epoch": 4.429193612247591, "percentage": 88.58, "elapsed_time": "0:49:22", "remaining_time": "0:06:21", "throughput": 5577.05, "total_tokens": 16520528} +{"current_steps": 33565, "total_steps": 37885, "loss": 0.0, "lr": 7.821446693642064e-08, "epoch": 4.42985350402534, "percentage": 88.6, "elapsed_time": "0:49:22", "remaining_time": "0:06:21", "throughput": 5577.23, "total_tokens": 16522896} +{"current_steps": 33570, "total_steps": 37885, "loss": 0.0, "lr": 7.803595201705692e-08, "epoch": 4.4305133958030885, "percentage": 88.61, "elapsed_time": "0:49:22", "remaining_time": "0:06:20", "throughput": 5577.46, "total_tokens": 16525392} +{"current_steps": 33575, "total_steps": 37885, "loss": 0.0001, "lr": 7.785763277788648e-08, "epoch": 4.431173287580837, "percentage": 88.62, "elapsed_time": "0:49:23", "remaining_time": "0:06:20", "throughput": 5577.69, "total_tokens": 16527952} +{"current_steps": 33580, "total_steps": 37885, "loss": 0.0, "lr": 7.767950925675559e-08, "epoch": 4.431833179358585, "percentage": 88.64, "elapsed_time": "0:49:23", "remaining_time": "0:06:19", "throughput": 5577.9, "total_tokens": 16530384} +{"current_steps": 33585, "total_steps": 37885, "loss": 0.0, "lr": 7.750158149147012e-08, "epoch": 4.432493071136333, "percentage": 88.65, "elapsed_time": "0:49:23", "remaining_time": "0:06:19", "throughput": 5578.08, "total_tokens": 16532752} +{"current_steps": 33590, "total_steps": 37885, "loss": 0.0, "lr": 7.732384951979354e-08, "epoch": 4.4331529629140825, "percentage": 88.66, "elapsed_time": "0:49:24", "remaining_time": "0:06:19", "throughput": 5578.31, "total_tokens": 16535248} +{"current_steps": 33595, "total_steps": 37885, "loss": 0.0, "lr": 7.714631337944854e-08, "epoch": 4.433812854691831, "percentage": 88.68, "elapsed_time": "0:49:24", "remaining_time": "0:06:18", "throughput": 5578.51, "total_tokens": 16537680} +{"current_steps": 33600, "total_steps": 37885, "loss": 0.0, "lr": 7.696897310811579e-08, "epoch": 4.434472746469579, "percentage": 88.69, "elapsed_time": "0:49:24", "remaining_time": "0:06:18", "throughput": 5578.77, "total_tokens": 16540304} +{"current_steps": 33605, "total_steps": 37885, "loss": 0.092, "lr": 7.679182874343437e-08, "epoch": 4.435132638247327, "percentage": 88.7, "elapsed_time": "0:49:25", "remaining_time": "0:06:17", "throughput": 5579.06, "total_tokens": 16542992} +{"current_steps": 33610, "total_steps": 37885, "loss": 0.0072, "lr": 7.66148803230019e-08, "epoch": 4.435792530025076, "percentage": 88.72, "elapsed_time": "0:49:25", "remaining_time": "0:06:17", "throughput": 5579.32, "total_tokens": 16545616} +{"current_steps": 33615, "total_steps": 37885, "loss": 0.0002, "lr": 7.643812788437454e-08, "epoch": 4.436452421802825, "percentage": 88.73, "elapsed_time": "0:49:25", "remaining_time": "0:06:16", "throughput": 5579.52, "total_tokens": 16548048} +{"current_steps": 33620, "total_steps": 37885, "loss": 0.0, "lr": 7.626157146506651e-08, "epoch": 4.437112313580573, "percentage": 88.74, "elapsed_time": "0:49:26", "remaining_time": "0:06:16", "throughput": 5579.67, "total_tokens": 16550288} +{"current_steps": 33625, "total_steps": 37885, "loss": 0.008, "lr": 7.608521110255084e-08, "epoch": 4.437772205358321, "percentage": 88.76, "elapsed_time": "0:49:26", "remaining_time": "0:06:15", "throughput": 5579.88, "total_tokens": 16552720} +{"current_steps": 33630, "total_steps": 37885, "loss": 0.0, "lr": 7.590904683425858e-08, "epoch": 4.43843209713607, "percentage": 88.77, "elapsed_time": "0:49:26", "remaining_time": "0:06:15", "throughput": 5580.04, "total_tokens": 16555024} +{"current_steps": 33635, "total_steps": 37885, "loss": 0.0, "lr": 7.57330786975795e-08, "epoch": 4.439091988913818, "percentage": 88.78, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.26, "total_tokens": 16557520} +{"current_steps": 33640, "total_steps": 37885, "loss": 0.0, "lr": 7.555730672986138e-08, "epoch": 4.439751880691567, "percentage": 88.8, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.43, "total_tokens": 16559824} +{"current_steps": 33645, "total_steps": 37885, "loss": 0.0, "lr": 7.53817309684106e-08, "epoch": 4.440411772469315, "percentage": 88.81, "elapsed_time": "0:49:27", "remaining_time": "0:06:14", "throughput": 5580.62, "total_tokens": 16562256} +{"current_steps": 33650, "total_steps": 37885, "loss": 0.0001, "lr": 7.520635145049193e-08, "epoch": 4.441071664247064, "percentage": 88.82, "elapsed_time": "0:49:28", "remaining_time": "0:06:13", "throughput": 5580.83, "total_tokens": 16564688} +{"current_steps": 33655, "total_steps": 37885, "loss": 0.028, "lr": 7.503116821332834e-08, "epoch": 4.441731556024812, "percentage": 88.83, "elapsed_time": "0:49:28", "remaining_time": "0:06:13", "throughput": 5580.97, "total_tokens": 16566928} +{"current_steps": 33660, "total_steps": 37885, "loss": 0.0, "lr": 7.485618129410109e-08, "epoch": 4.44239144780256, "percentage": 88.85, "elapsed_time": "0:49:28", "remaining_time": "0:06:12", "throughput": 5581.15, "total_tokens": 16569296} +{"current_steps": 33665, "total_steps": 37885, "loss": 0.0, "lr": 7.468139072994994e-08, "epoch": 4.443051339580309, "percentage": 88.86, "elapsed_time": "0:49:29", "remaining_time": "0:06:12", "throughput": 5581.35, "total_tokens": 16571728} +{"current_steps": 33670, "total_steps": 37885, "loss": 0.0015, "lr": 7.450679655797321e-08, "epoch": 4.443711231358058, "percentage": 88.87, "elapsed_time": "0:49:29", "remaining_time": "0:06:11", "throughput": 5581.55, "total_tokens": 16574160} +{"current_steps": 33675, "total_steps": 37885, "loss": 0.0018, "lr": 7.433239881522691e-08, "epoch": 4.444371123135806, "percentage": 88.89, "elapsed_time": "0:49:29", "remaining_time": "0:06:11", "throughput": 5581.67, "total_tokens": 16576336} +{"current_steps": 33680, "total_steps": 37885, "loss": 0.0001, "lr": 7.415819753872576e-08, "epoch": 4.445031014913554, "percentage": 88.9, "elapsed_time": "0:49:30", "remaining_time": "0:06:10", "throughput": 5581.88, "total_tokens": 16578768} +{"current_steps": 33685, "total_steps": 37885, "loss": 0.0, "lr": 7.398419276544287e-08, "epoch": 4.445690906691302, "percentage": 88.91, "elapsed_time": "0:49:30", "remaining_time": "0:06:10", "throughput": 5582.06, "total_tokens": 16581136} +{"current_steps": 33690, "total_steps": 37885, "loss": 0.0049, "lr": 7.381038453230925e-08, "epoch": 4.446350798469051, "percentage": 88.93, "elapsed_time": "0:49:30", "remaining_time": "0:06:09", "throughput": 5582.26, "total_tokens": 16583568} +{"current_steps": 33695, "total_steps": 37885, "loss": 0.028, "lr": 7.363677287621462e-08, "epoch": 4.4470106902468, "percentage": 88.94, "elapsed_time": "0:49:31", "remaining_time": "0:06:09", "throughput": 5582.47, "total_tokens": 16586000} +{"current_steps": 33700, "total_steps": 37885, "loss": 0.0, "lr": 7.346335783400693e-08, "epoch": 4.447670582024548, "percentage": 88.95, "elapsed_time": "0:49:31", "remaining_time": "0:06:09", "throughput": 5582.65, "total_tokens": 16588368} +{"current_steps": 33705, "total_steps": 37885, "loss": 0.0, "lr": 7.329013944249186e-08, "epoch": 4.448330473802296, "percentage": 88.97, "elapsed_time": "0:49:31", "remaining_time": "0:06:08", "throughput": 5582.83, "total_tokens": 16590736} +{"current_steps": 33710, "total_steps": 37885, "loss": 0.0, "lr": 7.311711773843399e-08, "epoch": 4.448990365580045, "percentage": 88.98, "elapsed_time": "0:49:32", "remaining_time": "0:06:08", "throughput": 5583.04, "total_tokens": 16593168} +{"current_steps": 33715, "total_steps": 37885, "loss": 0.0294, "lr": 7.294429275855596e-08, "epoch": 4.449650257357793, "percentage": 88.99, "elapsed_time": "0:49:32", "remaining_time": "0:06:07", "throughput": 5583.2, "total_tokens": 16595472} +{"current_steps": 33720, "total_steps": 37885, "loss": 0.0, "lr": 7.277166453953865e-08, "epoch": 4.450310149135542, "percentage": 89.01, "elapsed_time": "0:49:32", "remaining_time": "0:06:07", "throughput": 5583.3, "total_tokens": 16597584} +{"current_steps": 33725, "total_steps": 37885, "loss": 0.0005, "lr": 7.259923311802119e-08, "epoch": 4.45097004091329, "percentage": 89.02, "elapsed_time": "0:49:33", "remaining_time": "0:06:06", "throughput": 5583.53, "total_tokens": 16600080} +{"current_steps": 33730, "total_steps": 37885, "loss": 0.0, "lr": 7.242699853060041e-08, "epoch": 4.451629932691039, "percentage": 89.03, "elapsed_time": "0:49:33", "remaining_time": "0:06:06", "throughput": 5583.75, "total_tokens": 16602576} +{"current_steps": 33735, "total_steps": 37885, "loss": 0.0001, "lr": 7.225496081383264e-08, "epoch": 4.452289824468787, "percentage": 89.05, "elapsed_time": "0:49:33", "remaining_time": "0:06:05", "throughput": 5584.01, "total_tokens": 16605200} +{"current_steps": 33740, "total_steps": 37885, "loss": 0.0003, "lr": 7.2083120004231e-08, "epoch": 4.452949716246535, "percentage": 89.06, "elapsed_time": "0:49:34", "remaining_time": "0:06:05", "throughput": 5584.19, "total_tokens": 16607568} +{"current_steps": 33745, "total_steps": 37885, "loss": 0.0, "lr": 7.191147613826787e-08, "epoch": 4.453609608024284, "percentage": 89.07, "elapsed_time": "0:49:34", "remaining_time": "0:06:04", "throughput": 5584.53, "total_tokens": 16610448} +{"current_steps": 33750, "total_steps": 37885, "loss": 0.0001, "lr": 7.17400292523731e-08, "epoch": 4.454269499802033, "percentage": 89.09, "elapsed_time": "0:49:34", "remaining_time": "0:06:04", "throughput": 5584.81, "total_tokens": 16613136} +{"current_steps": 33755, "total_steps": 37885, "loss": 0.0, "lr": 7.156877938293515e-08, "epoch": 4.454929391579781, "percentage": 89.1, "elapsed_time": "0:49:35", "remaining_time": "0:06:04", "throughput": 5585.03, "total_tokens": 16615632} +{"current_steps": 33760, "total_steps": 37885, "loss": 0.0, "lr": 7.139772656630083e-08, "epoch": 4.455589283357529, "percentage": 89.11, "elapsed_time": "0:49:35", "remaining_time": "0:06:03", "throughput": 5585.27, "total_tokens": 16618192} +{"current_steps": 33765, "total_steps": 37885, "loss": 0.0, "lr": 7.122687083877422e-08, "epoch": 4.4562491751352775, "percentage": 89.12, "elapsed_time": "0:49:35", "remaining_time": "0:06:03", "throughput": 5585.43, "total_tokens": 16620496} +{"current_steps": 33770, "total_steps": 37885, "loss": 0.0381, "lr": 7.105621223661906e-08, "epoch": 4.456909066913026, "percentage": 89.14, "elapsed_time": "0:49:36", "remaining_time": "0:06:02", "throughput": 5585.61, "total_tokens": 16622864} +{"current_steps": 33775, "total_steps": 37885, "loss": 0.0, "lr": 7.088575079605585e-08, "epoch": 4.457568958690775, "percentage": 89.15, "elapsed_time": "0:49:36", "remaining_time": "0:06:02", "throughput": 5585.82, "total_tokens": 16625360} +{"current_steps": 33780, "total_steps": 37885, "loss": 0.0, "lr": 7.071548655326387e-08, "epoch": 4.458228850468523, "percentage": 89.16, "elapsed_time": "0:49:36", "remaining_time": "0:06:01", "throughput": 5586.05, "total_tokens": 16627856} +{"current_steps": 33785, "total_steps": 37885, "loss": 0.0, "lr": 7.054541954438053e-08, "epoch": 4.4588887422462715, "percentage": 89.18, "elapsed_time": "0:49:37", "remaining_time": "0:06:01", "throughput": 5586.33, "total_tokens": 16630544} +{"current_steps": 33790, "total_steps": 37885, "loss": 0.0, "lr": 7.03755498055012e-08, "epoch": 4.45954863402402, "percentage": 89.19, "elapsed_time": "0:49:37", "remaining_time": "0:06:00", "throughput": 5586.57, "total_tokens": 16633104} +{"current_steps": 33795, "total_steps": 37885, "loss": 0.0386, "lr": 7.02058773726798e-08, "epoch": 4.460208525801768, "percentage": 89.2, "elapsed_time": "0:49:37", "remaining_time": "0:06:00", "throughput": 5586.83, "total_tokens": 16635728} +{"current_steps": 33800, "total_steps": 37885, "loss": 0.0, "lr": 7.003640228192775e-08, "epoch": 4.460868417579517, "percentage": 89.22, "elapsed_time": "0:49:37", "remaining_time": "0:05:59", "throughput": 5586.95, "total_tokens": 16637904} +{"current_steps": 33805, "total_steps": 37885, "loss": 0.0, "lr": 6.986712456921506e-08, "epoch": 4.4615283093572655, "percentage": 89.23, "elapsed_time": "0:49:38", "remaining_time": "0:05:59", "throughput": 5587.12, "total_tokens": 16640208} +{"current_steps": 33810, "total_steps": 37885, "loss": 0.0, "lr": 6.969804427046988e-08, "epoch": 4.462188201135014, "percentage": 89.24, "elapsed_time": "0:49:38", "remaining_time": "0:05:59", "throughput": 5587.32, "total_tokens": 16642640} +{"current_steps": 33815, "total_steps": 37885, "loss": 0.0239, "lr": 6.952916142157783e-08, "epoch": 4.462848092912762, "percentage": 89.26, "elapsed_time": "0:49:38", "remaining_time": "0:05:58", "throughput": 5587.55, "total_tokens": 16645136} +{"current_steps": 33820, "total_steps": 37885, "loss": 0.0, "lr": 6.936047605838347e-08, "epoch": 4.46350798469051, "percentage": 89.27, "elapsed_time": "0:49:39", "remaining_time": "0:05:58", "throughput": 5587.69, "total_tokens": 16647376} +{"current_steps": 33825, "total_steps": 37885, "loss": 0.0, "lr": 6.919198821668892e-08, "epoch": 4.4641678764682595, "percentage": 89.28, "elapsed_time": "0:49:39", "remaining_time": "0:05:57", "throughput": 5587.83, "total_tokens": 16649616} +{"current_steps": 33830, "total_steps": 37885, "loss": 0.0, "lr": 6.902369793225437e-08, "epoch": 4.464827768246008, "percentage": 89.3, "elapsed_time": "0:49:39", "remaining_time": "0:05:57", "throughput": 5588.04, "total_tokens": 16652048} +{"current_steps": 33835, "total_steps": 37885, "loss": 0.0, "lr": 6.885560524079837e-08, "epoch": 4.465487660023756, "percentage": 89.31, "elapsed_time": "0:49:40", "remaining_time": "0:05:56", "throughput": 5588.26, "total_tokens": 16654544} +{"current_steps": 33840, "total_steps": 37885, "loss": 0.001, "lr": 6.868771017799735e-08, "epoch": 4.466147551801504, "percentage": 89.32, "elapsed_time": "0:49:40", "remaining_time": "0:05:56", "throughput": 5588.5, "total_tokens": 16657104} +{"current_steps": 33845, "total_steps": 37885, "loss": 0.0366, "lr": 6.852001277948593e-08, "epoch": 4.466807443579253, "percentage": 89.34, "elapsed_time": "0:49:40", "remaining_time": "0:05:55", "throughput": 5588.71, "total_tokens": 16659600} +{"current_steps": 33850, "total_steps": 37885, "loss": 0.0, "lr": 6.835251308085644e-08, "epoch": 4.467467335357002, "percentage": 89.35, "elapsed_time": "0:49:41", "remaining_time": "0:05:55", "throughput": 5589.01, "total_tokens": 16662352} +{"current_steps": 33855, "total_steps": 37885, "loss": 0.0, "lr": 6.818521111765952e-08, "epoch": 4.46812722713475, "percentage": 89.36, "elapsed_time": "0:49:41", "remaining_time": "0:05:54", "throughput": 5589.15, "total_tokens": 16664592} +{"current_steps": 33860, "total_steps": 37885, "loss": 0.0, "lr": 6.801810692540411e-08, "epoch": 4.468787118912498, "percentage": 89.38, "elapsed_time": "0:49:41", "remaining_time": "0:05:54", "throughput": 5589.4, "total_tokens": 16667216} +{"current_steps": 33865, "total_steps": 37885, "loss": 0.0001, "lr": 6.78512005395564e-08, "epoch": 4.469447010690247, "percentage": 89.39, "elapsed_time": "0:49:42", "remaining_time": "0:05:54", "throughput": 5589.64, "total_tokens": 16669776} +{"current_steps": 33870, "total_steps": 37885, "loss": 0.0, "lr": 6.768449199554127e-08, "epoch": 4.470106902467995, "percentage": 89.4, "elapsed_time": "0:49:42", "remaining_time": "0:05:53", "throughput": 5589.84, "total_tokens": 16672208} +{"current_steps": 33875, "total_steps": 37885, "loss": 0.0, "lr": 6.751798132874154e-08, "epoch": 4.470766794245744, "percentage": 89.42, "elapsed_time": "0:49:42", "remaining_time": "0:05:53", "throughput": 5590.0, "total_tokens": 16674512} +{"current_steps": 33880, "total_steps": 37885, "loss": 0.0002, "lr": 6.73516685744977e-08, "epoch": 4.471426686023492, "percentage": 89.43, "elapsed_time": "0:49:43", "remaining_time": "0:05:52", "throughput": 5590.16, "total_tokens": 16676816} +{"current_steps": 33885, "total_steps": 37885, "loss": 0.0192, "lr": 6.718555376810864e-08, "epoch": 4.472086577801241, "percentage": 89.44, "elapsed_time": "0:49:43", "remaining_time": "0:05:52", "throughput": 5590.4, "total_tokens": 16679376} +{"current_steps": 33890, "total_steps": 37885, "loss": 0.0, "lr": 6.70196369448306e-08, "epoch": 4.472746469578989, "percentage": 89.45, "elapsed_time": "0:49:43", "remaining_time": "0:05:51", "throughput": 5590.61, "total_tokens": 16681808} +{"current_steps": 33895, "total_steps": 37885, "loss": 0.0, "lr": 6.685391813987873e-08, "epoch": 4.473406361356737, "percentage": 89.47, "elapsed_time": "0:49:44", "remaining_time": "0:05:51", "throughput": 5590.9, "total_tokens": 16684560} +{"current_steps": 33900, "total_steps": 37885, "loss": 0.0239, "lr": 6.668839738842547e-08, "epoch": 4.474066253134486, "percentage": 89.48, "elapsed_time": "0:49:44", "remaining_time": "0:05:50", "throughput": 5591.12, "total_tokens": 16687056} +{"current_steps": 33905, "total_steps": 37885, "loss": 0.0666, "lr": 6.652307472560103e-08, "epoch": 4.474726144912235, "percentage": 89.49, "elapsed_time": "0:49:44", "remaining_time": "0:05:50", "throughput": 5591.3, "total_tokens": 16689424} +{"current_steps": 33910, "total_steps": 37885, "loss": 0.0, "lr": 6.635795018649459e-08, "epoch": 4.475386036689983, "percentage": 89.51, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.5, "total_tokens": 16691856} +{"current_steps": 33915, "total_steps": 37885, "loss": 0.0266, "lr": 6.61930238061521e-08, "epoch": 4.476045928467731, "percentage": 89.52, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.69, "total_tokens": 16694288} +{"current_steps": 33920, "total_steps": 37885, "loss": 0.0395, "lr": 6.602829561957846e-08, "epoch": 4.4767058202454795, "percentage": 89.53, "elapsed_time": "0:49:45", "remaining_time": "0:05:49", "throughput": 5591.96, "total_tokens": 16696976} +{"current_steps": 33925, "total_steps": 37885, "loss": 0.0, "lr": 6.586376566173556e-08, "epoch": 4.477365712023229, "percentage": 89.55, "elapsed_time": "0:49:46", "remaining_time": "0:05:48", "throughput": 5592.2, "total_tokens": 16699536} +{"current_steps": 33930, "total_steps": 37885, "loss": 0.0009, "lr": 6.569943396754396e-08, "epoch": 4.478025603800977, "percentage": 89.56, "elapsed_time": "0:49:46", "remaining_time": "0:05:48", "throughput": 5592.38, "total_tokens": 16701904} +{"current_steps": 33935, "total_steps": 37885, "loss": 0.0, "lr": 6.553530057188206e-08, "epoch": 4.478685495578725, "percentage": 89.57, "elapsed_time": "0:49:46", "remaining_time": "0:05:47", "throughput": 5592.56, "total_tokens": 16704272} +{"current_steps": 33940, "total_steps": 37885, "loss": 0.0, "lr": 6.537136550958545e-08, "epoch": 4.4793453873564735, "percentage": 89.59, "elapsed_time": "0:49:47", "remaining_time": "0:05:47", "throughput": 5592.82, "total_tokens": 16706896} +{"current_steps": 33945, "total_steps": 37885, "loss": 0.0016, "lr": 6.52076288154485e-08, "epoch": 4.480005279134222, "percentage": 89.6, "elapsed_time": "0:49:47", "remaining_time": "0:05:46", "throughput": 5592.92, "total_tokens": 16709008} +{"current_steps": 33950, "total_steps": 37885, "loss": 0.007, "lr": 6.504409052422332e-08, "epoch": 4.48066517091197, "percentage": 89.61, "elapsed_time": "0:49:47", "remaining_time": "0:05:46", "throughput": 5593.12, "total_tokens": 16711440} +{"current_steps": 33955, "total_steps": 37885, "loss": 0.0969, "lr": 6.488075067061927e-08, "epoch": 4.481325062689719, "percentage": 89.63, "elapsed_time": "0:49:48", "remaining_time": "0:05:45", "throughput": 5593.4, "total_tokens": 16714128} +{"current_steps": 33960, "total_steps": 37885, "loss": 0.0, "lr": 6.471760928930436e-08, "epoch": 4.4819849544674675, "percentage": 89.64, "elapsed_time": "0:49:48", "remaining_time": "0:05:45", "throughput": 5593.59, "total_tokens": 16716560} +{"current_steps": 33965, "total_steps": 37885, "loss": 0.0, "lr": 6.455466641490403e-08, "epoch": 4.482644846245216, "percentage": 89.65, "elapsed_time": "0:49:48", "remaining_time": "0:05:44", "throughput": 5593.83, "total_tokens": 16719120} +{"current_steps": 33970, "total_steps": 37885, "loss": 0.0, "lr": 6.439192208200195e-08, "epoch": 4.483304738022964, "percentage": 89.67, "elapsed_time": "0:49:49", "remaining_time": "0:05:44", "throughput": 5594.03, "total_tokens": 16721552} +{"current_steps": 33975, "total_steps": 37885, "loss": 0.0, "lr": 6.422937632513914e-08, "epoch": 4.483964629800712, "percentage": 89.68, "elapsed_time": "0:49:49", "remaining_time": "0:05:44", "throughput": 5594.33, "total_tokens": 16724304} +{"current_steps": 33980, "total_steps": 37885, "loss": 0.0, "lr": 6.40670291788149e-08, "epoch": 4.4846245215784615, "percentage": 89.69, "elapsed_time": "0:49:49", "remaining_time": "0:05:43", "throughput": 5594.6, "total_tokens": 16726992} +{"current_steps": 33985, "total_steps": 37885, "loss": 0.0, "lr": 6.390488067748634e-08, "epoch": 4.48528441335621, "percentage": 89.71, "elapsed_time": "0:49:50", "remaining_time": "0:05:43", "throughput": 5594.82, "total_tokens": 16729488} +{"current_steps": 33990, "total_steps": 37885, "loss": 0.0, "lr": 6.374293085556814e-08, "epoch": 4.485944305133958, "percentage": 89.72, "elapsed_time": "0:49:50", "remaining_time": "0:05:42", "throughput": 5595.02, "total_tokens": 16731920} +{"current_steps": 33995, "total_steps": 37885, "loss": 0.0, "lr": 6.358117974743293e-08, "epoch": 4.486604196911706, "percentage": 89.73, "elapsed_time": "0:49:50", "remaining_time": "0:05:42", "throughput": 5595.24, "total_tokens": 16734416} +{"current_steps": 34000, "total_steps": 37885, "loss": 0.0, "lr": 6.341962738741125e-08, "epoch": 4.487264088689455, "percentage": 89.75, "elapsed_time": "0:49:51", "remaining_time": "0:05:41", "throughput": 5595.51, "total_tokens": 16737104} +{"current_steps": 34005, "total_steps": 37885, "loss": 0.0, "lr": 6.325827380979176e-08, "epoch": 4.487923980467204, "percentage": 89.76, "elapsed_time": "0:49:51", "remaining_time": "0:05:41", "throughput": 5595.71, "total_tokens": 16739536} +{"current_steps": 34010, "total_steps": 37885, "loss": 0.0, "lr": 6.309711904882009e-08, "epoch": 4.488583872244952, "percentage": 89.77, "elapsed_time": "0:49:51", "remaining_time": "0:05:40", "throughput": 5595.83, "total_tokens": 16741712} +{"current_steps": 34015, "total_steps": 37885, "loss": 0.0, "lr": 6.293616313870032e-08, "epoch": 4.4892437640227, "percentage": 89.78, "elapsed_time": "0:49:52", "remaining_time": "0:05:40", "throughput": 5595.92, "total_tokens": 16743824} +{"current_steps": 34020, "total_steps": 37885, "loss": 0.0, "lr": 6.277540611359445e-08, "epoch": 4.489903655800449, "percentage": 89.8, "elapsed_time": "0:49:52", "remaining_time": "0:05:39", "throughput": 5596.12, "total_tokens": 16746256} +{"current_steps": 34025, "total_steps": 37885, "loss": 0.0, "lr": 6.261484800762163e-08, "epoch": 4.490563547578197, "percentage": 89.81, "elapsed_time": "0:49:52", "remaining_time": "0:05:39", "throughput": 5596.3, "total_tokens": 16748624} +{"current_steps": 34030, "total_steps": 37885, "loss": 0.0001, "lr": 6.245448885485938e-08, "epoch": 4.491223439355946, "percentage": 89.82, "elapsed_time": "0:49:53", "remaining_time": "0:05:39", "throughput": 5596.56, "total_tokens": 16751248} +{"current_steps": 34035, "total_steps": 37885, "loss": 0.0, "lr": 6.229432868934281e-08, "epoch": 4.491883331133694, "percentage": 89.84, "elapsed_time": "0:49:53", "remaining_time": "0:05:38", "throughput": 5596.75, "total_tokens": 16753680} +{"current_steps": 34040, "total_steps": 37885, "loss": 0.0001, "lr": 6.21343675450644e-08, "epoch": 4.492543222911443, "percentage": 89.85, "elapsed_time": "0:49:53", "remaining_time": "0:05:38", "throughput": 5596.99, "total_tokens": 16756240} +{"current_steps": 34045, "total_steps": 37885, "loss": 0.0001, "lr": 6.19746054559751e-08, "epoch": 4.493203114689191, "percentage": 89.86, "elapsed_time": "0:49:54", "remaining_time": "0:05:37", "throughput": 5597.19, "total_tokens": 16758672} +{"current_steps": 34050, "total_steps": 37885, "loss": 0.0, "lr": 6.181504245598312e-08, "epoch": 4.493863006466939, "percentage": 89.88, "elapsed_time": "0:49:54", "remaining_time": "0:05:37", "throughput": 5597.31, "total_tokens": 16760848} +{"current_steps": 34055, "total_steps": 37885, "loss": 0.0, "lr": 6.165567857895471e-08, "epoch": 4.494522898244687, "percentage": 89.89, "elapsed_time": "0:49:54", "remaining_time": "0:05:36", "throughput": 5597.53, "total_tokens": 16763344} +{"current_steps": 34060, "total_steps": 37885, "loss": 0.0005, "lr": 6.149651385871358e-08, "epoch": 4.495182790022437, "percentage": 89.9, "elapsed_time": "0:49:55", "remaining_time": "0:05:36", "throughput": 5597.76, "total_tokens": 16765904} +{"current_steps": 34065, "total_steps": 37885, "loss": 0.0, "lr": 6.133754832904092e-08, "epoch": 4.495842681800185, "percentage": 89.92, "elapsed_time": "0:49:55", "remaining_time": "0:05:35", "throughput": 5597.97, "total_tokens": 16768336} +{"current_steps": 34070, "total_steps": 37885, "loss": 0.0213, "lr": 6.117878202367677e-08, "epoch": 4.496502573577933, "percentage": 89.93, "elapsed_time": "0:49:55", "remaining_time": "0:05:35", "throughput": 5598.18, "total_tokens": 16770832} +{"current_steps": 34075, "total_steps": 37885, "loss": 0.0001, "lr": 6.102021497631749e-08, "epoch": 4.497162465355681, "percentage": 89.94, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.38, "total_tokens": 16773264} +{"current_steps": 34080, "total_steps": 37885, "loss": 0.0103, "lr": 6.086184722061826e-08, "epoch": 4.49782235713343, "percentage": 89.96, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.62, "total_tokens": 16775824} +{"current_steps": 34085, "total_steps": 37885, "loss": 0.0014, "lr": 6.070367879019101e-08, "epoch": 4.498482248911179, "percentage": 89.97, "elapsed_time": "0:49:56", "remaining_time": "0:05:34", "throughput": 5598.75, "total_tokens": 16778064} +{"current_steps": 34090, "total_steps": 37885, "loss": 0.0007, "lr": 6.054570971860618e-08, "epoch": 4.499142140688927, "percentage": 89.98, "elapsed_time": "0:49:57", "remaining_time": "0:05:33", "throughput": 5598.99, "total_tokens": 16780624} +{"current_steps": 34095, "total_steps": 37885, "loss": 0.0242, "lr": 6.038794003939151e-08, "epoch": 4.499802032466675, "percentage": 90.0, "elapsed_time": "0:49:57", "remaining_time": "0:05:33", "throughput": 5599.25, "total_tokens": 16783248} +{"current_steps": 34100, "total_steps": 37885, "loss": 0.0016, "lr": 6.023036978603213e-08, "epoch": 4.500461924244424, "percentage": 90.01, "elapsed_time": "0:49:57", "remaining_time": "0:05:32", "throughput": 5599.4, "total_tokens": 16785552} +{"current_steps": 34105, "total_steps": 37885, "loss": 0.0, "lr": 6.007299899197194e-08, "epoch": 4.501121816022172, "percentage": 90.02, "elapsed_time": "0:49:58", "remaining_time": "0:05:32", "throughput": 5599.52, "total_tokens": 16787728} +{"current_steps": 34110, "total_steps": 37885, "loss": 0.0, "lr": 5.991582769061121e-08, "epoch": 4.501781707799921, "percentage": 90.04, "elapsed_time": "0:49:58", "remaining_time": "0:05:31", "throughput": 5599.75, "total_tokens": 16790288} +{"current_steps": 34110, "total_steps": 37885, "eval_loss": 0.2763582170009613, "epoch": 4.501781707799921, "percentage": 90.04, "elapsed_time": "0:50:06", "remaining_time": "0:05:32", "throughput": 5585.2, "total_tokens": 16790288} +{"current_steps": 34115, "total_steps": 37885, "loss": 0.0, "lr": 5.975885591530827e-08, "epoch": 4.502441599577669, "percentage": 90.05, "elapsed_time": "0:50:42", "remaining_time": "0:05:36", "throughput": 5519.2, "total_tokens": 16792848} +{"current_steps": 34120, "total_steps": 37885, "loss": 0.0518, "lr": 5.9602083699379577e-08, "epoch": 4.503101491355418, "percentage": 90.06, "elapsed_time": "0:50:42", "remaining_time": "0:05:35", "throughput": 5519.44, "total_tokens": 16795408} +{"current_steps": 34125, "total_steps": 37885, "loss": 0.028, "lr": 5.9445511076098745e-08, "epoch": 4.503761383133166, "percentage": 90.08, "elapsed_time": "0:50:43", "remaining_time": "0:05:35", "throughput": 5519.72, "total_tokens": 16798096} +{"current_steps": 34130, "total_steps": 37885, "loss": 0.0, "lr": 5.92891380786974e-08, "epoch": 4.504421274910914, "percentage": 90.09, "elapsed_time": "0:50:43", "remaining_time": "0:05:34", "throughput": 5519.92, "total_tokens": 16800528} +{"current_steps": 34135, "total_steps": 37885, "loss": 0.0, "lr": 5.913296474036422e-08, "epoch": 4.505081166688663, "percentage": 90.1, "elapsed_time": "0:50:43", "remaining_time": "0:05:34", "throughput": 5520.14, "total_tokens": 16803024} +{"current_steps": 34140, "total_steps": 37885, "loss": 0.0, "lr": 5.8976991094246034e-08, "epoch": 4.505741058466412, "percentage": 90.11, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.34, "total_tokens": 16805456} +{"current_steps": 34145, "total_steps": 37885, "loss": 0.0005, "lr": 5.882121717344735e-08, "epoch": 4.50640095024416, "percentage": 90.13, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.46, "total_tokens": 16807632} +{"current_steps": 34150, "total_steps": 37885, "loss": 0.0, "lr": 5.866564301102972e-08, "epoch": 4.507060842021908, "percentage": 90.14, "elapsed_time": "0:50:44", "remaining_time": "0:05:33", "throughput": 5520.71, "total_tokens": 16810256} +{"current_steps": 34155, "total_steps": 37885, "loss": 0.0047, "lr": 5.851026864001263e-08, "epoch": 4.5077207337996565, "percentage": 90.15, "elapsed_time": "0:50:45", "remaining_time": "0:05:32", "throughput": 5521.01, "total_tokens": 16813008} +{"current_steps": 34160, "total_steps": 37885, "loss": 0.0294, "lr": 5.835509409337358e-08, "epoch": 4.508380625577406, "percentage": 90.17, "elapsed_time": "0:50:45", "remaining_time": "0:05:32", "throughput": 5521.18, "total_tokens": 16815376} +{"current_steps": 34165, "total_steps": 37885, "loss": 0.0, "lr": 5.820011940404668e-08, "epoch": 4.509040517355154, "percentage": 90.18, "elapsed_time": "0:50:45", "remaining_time": "0:05:31", "throughput": 5521.34, "total_tokens": 16817680} +{"current_steps": 34170, "total_steps": 37885, "loss": 0.0, "lr": 5.804534460492449e-08, "epoch": 4.509700409132902, "percentage": 90.19, "elapsed_time": "0:50:46", "remaining_time": "0:05:31", "throughput": 5521.62, "total_tokens": 16820368} +{"current_steps": 34175, "total_steps": 37885, "loss": 0.0, "lr": 5.789076972885687e-08, "epoch": 4.5103603009106505, "percentage": 90.21, "elapsed_time": "0:50:46", "remaining_time": "0:05:30", "throughput": 5521.78, "total_tokens": 16822672} +{"current_steps": 34180, "total_steps": 37885, "loss": 0.0, "lr": 5.7736394808651226e-08, "epoch": 4.511020192688399, "percentage": 90.22, "elapsed_time": "0:50:46", "remaining_time": "0:05:30", "throughput": 5521.94, "total_tokens": 16824976} +{"current_steps": 34185, "total_steps": 37885, "loss": 0.0, "lr": 5.758221987707235e-08, "epoch": 4.511680084466148, "percentage": 90.23, "elapsed_time": "0:50:47", "remaining_time": "0:05:29", "throughput": 5522.16, "total_tokens": 16827472} +{"current_steps": 34190, "total_steps": 37885, "loss": 0.0, "lr": 5.742824496684284e-08, "epoch": 4.512339976243896, "percentage": 90.25, "elapsed_time": "0:50:47", "remaining_time": "0:05:29", "throughput": 5522.34, "total_tokens": 16829840} +{"current_steps": 34195, "total_steps": 37885, "loss": 0.0, "lr": 5.72744701106429e-08, "epoch": 4.5129998680216445, "percentage": 90.26, "elapsed_time": "0:50:47", "remaining_time": "0:05:28", "throughput": 5522.58, "total_tokens": 16832400} +{"current_steps": 34200, "total_steps": 37885, "loss": 0.0016, "lr": 5.7120895341109864e-08, "epoch": 4.513659759799393, "percentage": 90.27, "elapsed_time": "0:50:48", "remaining_time": "0:05:28", "throughput": 5522.78, "total_tokens": 16834832} +{"current_steps": 34205, "total_steps": 37885, "loss": 0.0066, "lr": 5.696752069083899e-08, "epoch": 4.514319651577141, "percentage": 90.29, "elapsed_time": "0:50:48", "remaining_time": "0:05:27", "throughput": 5522.96, "total_tokens": 16837200} +{"current_steps": 34210, "total_steps": 37885, "loss": 0.0, "lr": 5.6814346192383125e-08, "epoch": 4.51497954335489, "percentage": 90.3, "elapsed_time": "0:50:48", "remaining_time": "0:05:27", "throughput": 5523.15, "total_tokens": 16839632} +{"current_steps": 34215, "total_steps": 37885, "loss": 0.0, "lr": 5.666137187825204e-08, "epoch": 4.5156394351326385, "percentage": 90.31, "elapsed_time": "0:50:49", "remaining_time": "0:05:27", "throughput": 5523.37, "total_tokens": 16842128} +{"current_steps": 34220, "total_steps": 37885, "loss": 0.0002, "lr": 5.650859778091388e-08, "epoch": 4.516299326910387, "percentage": 90.33, "elapsed_time": "0:50:49", "remaining_time": "0:05:26", "throughput": 5523.47, "total_tokens": 16844240} +{"current_steps": 34225, "total_steps": 37885, "loss": 0.0, "lr": 5.635602393279326e-08, "epoch": 4.516959218688135, "percentage": 90.34, "elapsed_time": "0:50:49", "remaining_time": "0:05:26", "throughput": 5523.57, "total_tokens": 16846352} +{"current_steps": 34230, "total_steps": 37885, "loss": 0.0, "lr": 5.62036503662735e-08, "epoch": 4.517619110465883, "percentage": 90.35, "elapsed_time": "0:50:50", "remaining_time": "0:05:25", "throughput": 5523.78, "total_tokens": 16848784} +{"current_steps": 34235, "total_steps": 37885, "loss": 0.0, "lr": 5.6051477113694625e-08, "epoch": 4.518279002243632, "percentage": 90.37, "elapsed_time": "0:50:50", "remaining_time": "0:05:25", "throughput": 5523.9, "total_tokens": 16850960} +{"current_steps": 34240, "total_steps": 37885, "loss": 0.0, "lr": 5.589950420735379e-08, "epoch": 4.518938894021381, "percentage": 90.38, "elapsed_time": "0:50:50", "remaining_time": "0:05:24", "throughput": 5524.27, "total_tokens": 16853968} +{"current_steps": 34245, "total_steps": 37885, "loss": 0.045, "lr": 5.574773167950697e-08, "epoch": 4.519598785799129, "percentage": 90.39, "elapsed_time": "0:50:51", "remaining_time": "0:05:24", "throughput": 5524.52, "total_tokens": 16856592} +{"current_steps": 34250, "total_steps": 37885, "loss": 0.0, "lr": 5.5596159562366076e-08, "epoch": 4.520258677576877, "percentage": 90.41, "elapsed_time": "0:50:51", "remaining_time": "0:05:23", "throughput": 5524.72, "total_tokens": 16859024} +{"current_steps": 34255, "total_steps": 37885, "loss": 0.0518, "lr": 5.5444787888101696e-08, "epoch": 4.520918569354626, "percentage": 90.42, "elapsed_time": "0:50:51", "remaining_time": "0:05:23", "throughput": 5524.86, "total_tokens": 16861264} +{"current_steps": 34260, "total_steps": 37885, "loss": 0.0002, "lr": 5.529361668884103e-08, "epoch": 4.521578461132374, "percentage": 90.43, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.06, "total_tokens": 16863696} +{"current_steps": 34265, "total_steps": 37885, "loss": 0.0, "lr": 5.514264599666918e-08, "epoch": 4.522238352910123, "percentage": 90.44, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.24, "total_tokens": 16866064} +{"current_steps": 34270, "total_steps": 37885, "loss": 0.0, "lr": 5.4991875843628745e-08, "epoch": 4.522898244687871, "percentage": 90.46, "elapsed_time": "0:50:52", "remaining_time": "0:05:22", "throughput": 5525.5, "total_tokens": 16868688} +{"current_steps": 34275, "total_steps": 37885, "loss": 0.0005, "lr": 5.484130626171923e-08, "epoch": 4.52355813646562, "percentage": 90.47, "elapsed_time": "0:50:53", "remaining_time": "0:05:21", "throughput": 5525.6, "total_tokens": 16870800} +{"current_steps": 34280, "total_steps": 37885, "loss": 0.0, "lr": 5.46909372828982e-08, "epoch": 4.524218028243368, "percentage": 90.48, "elapsed_time": "0:50:53", "remaining_time": "0:05:21", "throughput": 5525.89, "total_tokens": 16873552} +{"current_steps": 34285, "total_steps": 37885, "loss": 0.0008, "lr": 5.454076893908055e-08, "epoch": 4.524877920021116, "percentage": 90.5, "elapsed_time": "0:50:53", "remaining_time": "0:05:20", "throughput": 5526.09, "total_tokens": 16875984} +{"current_steps": 34290, "total_steps": 37885, "loss": 0.0003, "lr": 5.439080126213802e-08, "epoch": 4.5255378117988645, "percentage": 90.51, "elapsed_time": "0:50:54", "remaining_time": "0:05:20", "throughput": 5526.33, "total_tokens": 16878544} +{"current_steps": 34295, "total_steps": 37885, "loss": 0.0, "lr": 5.4241034283900364e-08, "epoch": 4.526197703576614, "percentage": 90.52, "elapsed_time": "0:50:54", "remaining_time": "0:05:19", "throughput": 5526.59, "total_tokens": 16881168} +{"current_steps": 34300, "total_steps": 37885, "loss": 0.0, "lr": 5.40914680361545e-08, "epoch": 4.526857595354362, "percentage": 90.54, "elapsed_time": "0:50:54", "remaining_time": "0:05:19", "throughput": 5526.75, "total_tokens": 16883472} +{"current_steps": 34305, "total_steps": 37885, "loss": 0.0, "lr": 5.394210255064502e-08, "epoch": 4.52751748713211, "percentage": 90.55, "elapsed_time": "0:50:55", "remaining_time": "0:05:18", "throughput": 5526.85, "total_tokens": 16885648} +{"current_steps": 34310, "total_steps": 37885, "loss": 0.0, "lr": 5.379293785907335e-08, "epoch": 4.5281773789098585, "percentage": 90.56, "elapsed_time": "0:50:55", "remaining_time": "0:05:18", "throughput": 5526.99, "total_tokens": 16887888} +{"current_steps": 34315, "total_steps": 37885, "loss": 0.0005, "lr": 5.364397399309861e-08, "epoch": 4.528837270687607, "percentage": 90.58, "elapsed_time": "0:50:55", "remaining_time": "0:05:17", "throughput": 5527.12, "total_tokens": 16890128} +{"current_steps": 34320, "total_steps": 37885, "loss": 0.0, "lr": 5.349521098433762e-08, "epoch": 4.529497162465356, "percentage": 90.59, "elapsed_time": "0:50:56", "remaining_time": "0:05:17", "throughput": 5527.3, "total_tokens": 16892496} +{"current_steps": 34325, "total_steps": 37885, "loss": 0.0415, "lr": 5.334664886436391e-08, "epoch": 4.530157054243104, "percentage": 90.6, "elapsed_time": "0:50:56", "remaining_time": "0:05:17", "throughput": 5527.41, "total_tokens": 16894608} +{"current_steps": 34330, "total_steps": 37885, "loss": 0.0, "lr": 5.3198287664708907e-08, "epoch": 4.5308169460208525, "percentage": 90.62, "elapsed_time": "0:50:56", "remaining_time": "0:05:16", "throughput": 5527.78, "total_tokens": 16897616} +{"current_steps": 34335, "total_steps": 37885, "loss": 0.0, "lr": 5.3050127416861104e-08, "epoch": 4.531476837798601, "percentage": 90.63, "elapsed_time": "0:50:57", "remaining_time": "0:05:16", "throughput": 5527.98, "total_tokens": 16900048} +{"current_steps": 34340, "total_steps": 37885, "loss": 0.0165, "lr": 5.290216815226656e-08, "epoch": 4.532136729576349, "percentage": 90.64, "elapsed_time": "0:50:57", "remaining_time": "0:05:15", "throughput": 5528.16, "total_tokens": 16902416} +{"current_steps": 34345, "total_steps": 37885, "loss": 0.0003, "lr": 5.275440990232838e-08, "epoch": 4.532796621354098, "percentage": 90.66, "elapsed_time": "0:50:57", "remaining_time": "0:05:15", "throughput": 5528.31, "total_tokens": 16904656} +{"current_steps": 34350, "total_steps": 37885, "loss": 0.0, "lr": 5.2606852698407367e-08, "epoch": 4.5334565131318465, "percentage": 90.67, "elapsed_time": "0:50:58", "remaining_time": "0:05:14", "throughput": 5528.55, "total_tokens": 16907216} +{"current_steps": 34355, "total_steps": 37885, "loss": 0.0381, "lr": 5.245949657182136e-08, "epoch": 4.534116404909595, "percentage": 90.68, "elapsed_time": "0:50:58", "remaining_time": "0:05:14", "throughput": 5528.81, "total_tokens": 16909840} +{"current_steps": 34360, "total_steps": 37885, "loss": 0.0003, "lr": 5.231234155384567e-08, "epoch": 4.534776296687343, "percentage": 90.7, "elapsed_time": "0:50:58", "remaining_time": "0:05:13", "throughput": 5529.07, "total_tokens": 16912464} +{"current_steps": 34365, "total_steps": 37885, "loss": 0.0, "lr": 5.216538767571277e-08, "epoch": 4.535436188465091, "percentage": 90.71, "elapsed_time": "0:50:59", "remaining_time": "0:05:13", "throughput": 5529.33, "total_tokens": 16915088} +{"current_steps": 34370, "total_steps": 37885, "loss": 0.002, "lr": 5.201863496861292e-08, "epoch": 4.5360960802428405, "percentage": 90.72, "elapsed_time": "0:50:59", "remaining_time": "0:05:12", "throughput": 5529.55, "total_tokens": 16917584} +{"current_steps": 34375, "total_steps": 37885, "loss": 0.0, "lr": 5.187208346369276e-08, "epoch": 4.536755972020589, "percentage": 90.74, "elapsed_time": "0:50:59", "remaining_time": "0:05:12", "throughput": 5529.77, "total_tokens": 16920080} +{"current_steps": 34380, "total_steps": 37885, "loss": 0.0, "lr": 5.17257331920572e-08, "epoch": 4.537415863798337, "percentage": 90.75, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.01, "total_tokens": 16922640} +{"current_steps": 34385, "total_steps": 37885, "loss": 0.0001, "lr": 5.157958418476793e-08, "epoch": 4.538075755576085, "percentage": 90.76, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.25, "total_tokens": 16925200} +{"current_steps": 34390, "total_steps": 37885, "loss": 0.0123, "lr": 5.1433636472844045e-08, "epoch": 4.538735647353834, "percentage": 90.77, "elapsed_time": "0:51:00", "remaining_time": "0:05:11", "throughput": 5530.4, "total_tokens": 16927504} +{"current_steps": 34395, "total_steps": 37885, "loss": 0.0079, "lr": 5.1287890087261864e-08, "epoch": 4.539395539131583, "percentage": 90.79, "elapsed_time": "0:51:01", "remaining_time": "0:05:10", "throughput": 5530.58, "total_tokens": 16929872} +{"current_steps": 34400, "total_steps": 37885, "loss": 0.0001, "lr": 5.114234505895465e-08, "epoch": 4.540055430909331, "percentage": 90.8, "elapsed_time": "0:51:01", "remaining_time": "0:05:10", "throughput": 5530.63, "total_tokens": 16931856} +{"current_steps": 34405, "total_steps": 37885, "loss": 0.0, "lr": 5.0997001418814025e-08, "epoch": 4.540715322687079, "percentage": 90.81, "elapsed_time": "0:51:01", "remaining_time": "0:05:09", "throughput": 5530.81, "total_tokens": 16934224} +{"current_steps": 34410, "total_steps": 37885, "loss": 0.0, "lr": 5.085185919768742e-08, "epoch": 4.541375214464828, "percentage": 90.83, "elapsed_time": "0:51:02", "remaining_time": "0:05:09", "throughput": 5530.98, "total_tokens": 16936592} +{"current_steps": 34415, "total_steps": 37885, "loss": 0.0, "lr": 5.0706918426380754e-08, "epoch": 4.542035106242576, "percentage": 90.84, "elapsed_time": "0:51:02", "remaining_time": "0:05:08", "throughput": 5531.18, "total_tokens": 16939024} +{"current_steps": 34420, "total_steps": 37885, "loss": 0.0366, "lr": 5.056217913565619e-08, "epoch": 4.542694998020325, "percentage": 90.85, "elapsed_time": "0:51:02", "remaining_time": "0:05:08", "throughput": 5531.38, "total_tokens": 16941456} +{"current_steps": 34425, "total_steps": 37885, "loss": 0.0, "lr": 5.0417641356233943e-08, "epoch": 4.543354889798073, "percentage": 90.87, "elapsed_time": "0:51:03", "remaining_time": "0:05:07", "throughput": 5531.49, "total_tokens": 16943632} +{"current_steps": 34430, "total_steps": 37885, "loss": 0.0, "lr": 5.027330511879102e-08, "epoch": 4.544014781575822, "percentage": 90.88, "elapsed_time": "0:51:03", "remaining_time": "0:05:07", "throughput": 5531.67, "total_tokens": 16946000} +{"current_steps": 34435, "total_steps": 37885, "loss": 0.0, "lr": 5.012917045396148e-08, "epoch": 4.54467467335357, "percentage": 90.89, "elapsed_time": "0:51:03", "remaining_time": "0:05:06", "throughput": 5531.9, "total_tokens": 16948560} +{"current_steps": 34440, "total_steps": 37885, "loss": 0.0001, "lr": 4.998523739233729e-08, "epoch": 4.545334565131318, "percentage": 90.91, "elapsed_time": "0:51:04", "remaining_time": "0:05:06", "throughput": 5532.07, "total_tokens": 16950928} +{"current_steps": 34445, "total_steps": 37885, "loss": 0.0005, "lr": 4.984150596446701e-08, "epoch": 4.545994456909067, "percentage": 90.92, "elapsed_time": "0:51:04", "remaining_time": "0:05:06", "throughput": 5532.26, "total_tokens": 16953360} +{"current_steps": 34450, "total_steps": 37885, "loss": 0.0, "lr": 4.9697976200856584e-08, "epoch": 4.546654348686816, "percentage": 90.93, "elapsed_time": "0:51:04", "remaining_time": "0:05:05", "throughput": 5532.47, "total_tokens": 16955856} +{"current_steps": 34455, "total_steps": 37885, "loss": 0.0, "lr": 4.955464813196897e-08, "epoch": 4.547314240464564, "percentage": 90.95, "elapsed_time": "0:51:05", "remaining_time": "0:05:05", "throughput": 5532.62, "total_tokens": 16958160} +{"current_steps": 34460, "total_steps": 37885, "loss": 0.0001, "lr": 4.941152178822483e-08, "epoch": 4.547974132242312, "percentage": 90.96, "elapsed_time": "0:51:05", "remaining_time": "0:05:04", "throughput": 5532.81, "total_tokens": 16960592} +{"current_steps": 34465, "total_steps": 37885, "loss": 0.0, "lr": 4.926859720000165e-08, "epoch": 4.5486340240200605, "percentage": 90.97, "elapsed_time": "0:51:05", "remaining_time": "0:05:04", "throughput": 5533.04, "total_tokens": 16963152} +{"current_steps": 34470, "total_steps": 37885, "loss": 0.0, "lr": 4.912587439763394e-08, "epoch": 4.54929391579781, "percentage": 90.99, "elapsed_time": "0:51:06", "remaining_time": "0:05:03", "throughput": 5533.22, "total_tokens": 16965584} +{"current_steps": 34475, "total_steps": 37885, "loss": 0.0, "lr": 4.898335341141369e-08, "epoch": 4.549953807575558, "percentage": 91.0, "elapsed_time": "0:51:06", "remaining_time": "0:05:03", "throughput": 5533.37, "total_tokens": 16967888} +{"current_steps": 34480, "total_steps": 37885, "loss": 0.0, "lr": 4.884103427159014e-08, "epoch": 4.550613699353306, "percentage": 91.01, "elapsed_time": "0:51:06", "remaining_time": "0:05:02", "throughput": 5533.54, "total_tokens": 16970256} +{"current_steps": 34485, "total_steps": 37885, "loss": 0.0615, "lr": 4.8698917008369144e-08, "epoch": 4.5512735911310545, "percentage": 91.03, "elapsed_time": "0:51:07", "remaining_time": "0:05:02", "throughput": 5533.88, "total_tokens": 16973200} +{"current_steps": 34490, "total_steps": 37885, "loss": 0.0, "lr": 4.855700165191423e-08, "epoch": 4.551933482908803, "percentage": 91.04, "elapsed_time": "0:51:07", "remaining_time": "0:05:01", "throughput": 5534.05, "total_tokens": 16975568} +{"current_steps": 34495, "total_steps": 37885, "loss": 0.0, "lr": 4.841528823234609e-08, "epoch": 4.552593374686552, "percentage": 91.05, "elapsed_time": "0:51:07", "remaining_time": "0:05:01", "throughput": 5534.28, "total_tokens": 16978128} +{"current_steps": 34500, "total_steps": 37885, "loss": 0.0, "lr": 4.8273776779741984e-08, "epoch": 4.5532532664643, "percentage": 91.07, "elapsed_time": "0:51:08", "remaining_time": "0:05:01", "throughput": 5534.48, "total_tokens": 16980560} +{"current_steps": 34505, "total_steps": 37885, "loss": 0.0, "lr": 4.8132467324136894e-08, "epoch": 4.5539131582420485, "percentage": 91.08, "elapsed_time": "0:51:08", "remaining_time": "0:05:00", "throughput": 5534.73, "total_tokens": 16983184} +{"current_steps": 34510, "total_steps": 37885, "loss": 0.0, "lr": 4.799135989552272e-08, "epoch": 4.554573050019797, "percentage": 91.09, "elapsed_time": "0:51:08", "remaining_time": "0:05:00", "throughput": 5534.87, "total_tokens": 16985488} +{"current_steps": 34515, "total_steps": 37885, "loss": 0.0019, "lr": 4.7850454523848725e-08, "epoch": 4.555232941797545, "percentage": 91.1, "elapsed_time": "0:51:09", "remaining_time": "0:04:59", "throughput": 5535.08, "total_tokens": 16987984} +{"current_steps": 34520, "total_steps": 37885, "loss": 0.0, "lr": 4.770975123902066e-08, "epoch": 4.555892833575293, "percentage": 91.12, "elapsed_time": "0:51:09", "remaining_time": "0:04:59", "throughput": 5535.23, "total_tokens": 16990288} +{"current_steps": 34525, "total_steps": 37885, "loss": 0.0, "lr": 4.756925007090185e-08, "epoch": 4.5565527253530425, "percentage": 91.13, "elapsed_time": "0:51:09", "remaining_time": "0:04:58", "throughput": 5535.48, "total_tokens": 16992912} +{"current_steps": 34530, "total_steps": 37885, "loss": 0.0294, "lr": 4.7428951049312996e-08, "epoch": 4.557212617130791, "percentage": 91.14, "elapsed_time": "0:51:10", "remaining_time": "0:04:58", "throughput": 5535.59, "total_tokens": 16995088} +{"current_steps": 34535, "total_steps": 37885, "loss": 0.0, "lr": 4.728885420403117e-08, "epoch": 4.557872508908539, "percentage": 91.16, "elapsed_time": "0:51:10", "remaining_time": "0:04:57", "throughput": 5535.79, "total_tokens": 16997520} +{"current_steps": 34540, "total_steps": 37885, "loss": 0.0, "lr": 4.714895956479104e-08, "epoch": 4.558532400686287, "percentage": 91.17, "elapsed_time": "0:51:10", "remaining_time": "0:04:57", "throughput": 5535.89, "total_tokens": 16999696} +{"current_steps": 34545, "total_steps": 37885, "loss": 0.0337, "lr": 4.700926716128428e-08, "epoch": 4.559192292464036, "percentage": 91.18, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.11, "total_tokens": 17002256} +{"current_steps": 34550, "total_steps": 37885, "loss": 0.0, "lr": 4.686977702315953e-08, "epoch": 4.559852184241785, "percentage": 91.2, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.35, "total_tokens": 17004880} +{"current_steps": 34555, "total_steps": 37885, "loss": 0.0719, "lr": 4.673048918002265e-08, "epoch": 4.560512076019533, "percentage": 91.21, "elapsed_time": "0:51:11", "remaining_time": "0:04:56", "throughput": 5536.56, "total_tokens": 17007376} +{"current_steps": 34560, "total_steps": 37885, "loss": 0.0, "lr": 4.659140366143621e-08, "epoch": 4.561171967797281, "percentage": 91.22, "elapsed_time": "0:51:12", "remaining_time": "0:04:55", "throughput": 5536.75, "total_tokens": 17009808} +{"current_steps": 34565, "total_steps": 37885, "loss": 0.0, "lr": 4.64525204969205e-08, "epoch": 4.56183185957503, "percentage": 91.24, "elapsed_time": "0:51:12", "remaining_time": "0:04:55", "throughput": 5537.09, "total_tokens": 17012752} +{"current_steps": 34570, "total_steps": 37885, "loss": 0.0, "lr": 4.631383971595226e-08, "epoch": 4.562491751352778, "percentage": 91.25, "elapsed_time": "0:51:12", "remaining_time": "0:04:54", "throughput": 5537.23, "total_tokens": 17015056} +{"current_steps": 34575, "total_steps": 37885, "loss": 0.0007, "lr": 4.617536134796529e-08, "epoch": 4.563151643130526, "percentage": 91.26, "elapsed_time": "0:51:13", "remaining_time": "0:04:54", "throughput": 5537.4, "total_tokens": 17017424} +{"current_steps": 34580, "total_steps": 37885, "loss": 0.0, "lr": 4.6037085422351077e-08, "epoch": 4.563811534908275, "percentage": 91.28, "elapsed_time": "0:51:13", "remaining_time": "0:04:53", "throughput": 5537.62, "total_tokens": 17019984} +{"current_steps": 34585, "total_steps": 37885, "loss": 0.0, "lr": 4.5899011968457244e-08, "epoch": 4.564471426686024, "percentage": 91.29, "elapsed_time": "0:51:13", "remaining_time": "0:04:53", "throughput": 5537.94, "total_tokens": 17022864} +{"current_steps": 34590, "total_steps": 37885, "loss": 0.0, "lr": 4.576114101558914e-08, "epoch": 4.565131318463772, "percentage": 91.3, "elapsed_time": "0:51:14", "remaining_time": "0:04:52", "throughput": 5538.1, "total_tokens": 17025168} +{"current_steps": 34595, "total_steps": 37885, "loss": 0.0, "lr": 4.562347259300881e-08, "epoch": 4.56579121024152, "percentage": 91.32, "elapsed_time": "0:51:14", "remaining_time": "0:04:52", "throughput": 5538.32, "total_tokens": 17027728} +{"current_steps": 34600, "total_steps": 37885, "loss": 0.0308, "lr": 4.54860067299353e-08, "epoch": 4.566451102019268, "percentage": 91.33, "elapsed_time": "0:51:14", "remaining_time": "0:04:51", "throughput": 5538.49, "total_tokens": 17030096} +{"current_steps": 34605, "total_steps": 37885, "loss": 0.0253, "lr": 4.534874345554496e-08, "epoch": 4.567110993797018, "percentage": 91.34, "elapsed_time": "0:51:15", "remaining_time": "0:04:51", "throughput": 5538.79, "total_tokens": 17032912} +{"current_steps": 34610, "total_steps": 37885, "loss": 0.0, "lr": 4.521168279897058e-08, "epoch": 4.567770885574766, "percentage": 91.36, "elapsed_time": "0:51:15", "remaining_time": "0:04:51", "throughput": 5539.05, "total_tokens": 17035600} +{"current_steps": 34615, "total_steps": 37885, "loss": 0.087, "lr": 4.507482478930258e-08, "epoch": 4.568430777352514, "percentage": 91.37, "elapsed_time": "0:51:15", "remaining_time": "0:04:50", "throughput": 5539.25, "total_tokens": 17038096} +{"current_steps": 34620, "total_steps": 37885, "loss": 0.0398, "lr": 4.493816945558815e-08, "epoch": 4.569090669130262, "percentage": 91.38, "elapsed_time": "0:51:16", "remaining_time": "0:04:50", "throughput": 5539.5, "total_tokens": 17040720} +{"current_steps": 34625, "total_steps": 37885, "loss": 0.0105, "lr": 4.480171682683098e-08, "epoch": 4.569750560908011, "percentage": 91.4, "elapsed_time": "0:51:16", "remaining_time": "0:04:49", "throughput": 5539.72, "total_tokens": 17043280} +{"current_steps": 34630, "total_steps": 37885, "loss": 0.0226, "lr": 4.466546693199247e-08, "epoch": 4.57041045268576, "percentage": 91.41, "elapsed_time": "0:51:16", "remaining_time": "0:04:49", "throughput": 5539.8, "total_tokens": 17045392} +{"current_steps": 34635, "total_steps": 37885, "loss": 0.0, "lr": 4.4529419799990695e-08, "epoch": 4.571070344463508, "percentage": 91.42, "elapsed_time": "0:51:17", "remaining_time": "0:04:48", "throughput": 5540.01, "total_tokens": 17047888} +{"current_steps": 34640, "total_steps": 37885, "loss": 0.0016, "lr": 4.439357545970068e-08, "epoch": 4.571730236241256, "percentage": 91.43, "elapsed_time": "0:51:17", "remaining_time": "0:04:48", "throughput": 5540.19, "total_tokens": 17050320} +{"current_steps": 34645, "total_steps": 37885, "loss": 0.0, "lr": 4.425793393995414e-08, "epoch": 4.572390128019005, "percentage": 91.45, "elapsed_time": "0:51:17", "remaining_time": "0:04:47", "throughput": 5540.44, "total_tokens": 17052944} +{"current_steps": 34650, "total_steps": 37885, "loss": 0.0, "lr": 4.412249526954015e-08, "epoch": 4.573050019796753, "percentage": 91.46, "elapsed_time": "0:51:18", "remaining_time": "0:04:47", "throughput": 5540.59, "total_tokens": 17055248} +{"current_steps": 34655, "total_steps": 37885, "loss": 0.0, "lr": 4.398725947720483e-08, "epoch": 4.573709911574502, "percentage": 91.47, "elapsed_time": "0:51:18", "remaining_time": "0:04:46", "throughput": 5540.83, "total_tokens": 17057872} +{"current_steps": 34660, "total_steps": 37885, "loss": 0.0, "lr": 4.385222659165067e-08, "epoch": 4.57436980335225, "percentage": 91.49, "elapsed_time": "0:51:18", "remaining_time": "0:04:46", "throughput": 5541.01, "total_tokens": 17060304} +{"current_steps": 34665, "total_steps": 37885, "loss": 0.0, "lr": 4.3717396641537395e-08, "epoch": 4.575029695129999, "percentage": 91.5, "elapsed_time": "0:51:19", "remaining_time": "0:04:46", "throughput": 5541.25, "total_tokens": 17062928} +{"current_steps": 34670, "total_steps": 37885, "loss": 0.0, "lr": 4.358276965548202e-08, "epoch": 4.575689586907747, "percentage": 91.51, "elapsed_time": "0:51:19", "remaining_time": "0:04:45", "throughput": 5541.44, "total_tokens": 17065360} +{"current_steps": 34675, "total_steps": 37885, "loss": 0.0001, "lr": 4.344834566205802e-08, "epoch": 4.576349478685495, "percentage": 91.53, "elapsed_time": "0:51:19", "remaining_time": "0:04:45", "throughput": 5541.62, "total_tokens": 17067792} +{"current_steps": 34680, "total_steps": 37885, "loss": 0.0, "lr": 4.331412468979567e-08, "epoch": 4.577009370463244, "percentage": 91.54, "elapsed_time": "0:51:20", "remaining_time": "0:04:44", "throughput": 5541.67, "total_tokens": 17069776} +{"current_steps": 34685, "total_steps": 37885, "loss": 0.0, "lr": 4.318010676718254e-08, "epoch": 4.577669262240993, "percentage": 91.55, "elapsed_time": "0:51:20", "remaining_time": "0:04:44", "throughput": 5541.87, "total_tokens": 17072272} +{"current_steps": 34690, "total_steps": 37885, "loss": 0.0, "lr": 4.304629192266318e-08, "epoch": 4.578329154018741, "percentage": 91.57, "elapsed_time": "0:51:20", "remaining_time": "0:04:43", "throughput": 5542.1, "total_tokens": 17074832} +{"current_steps": 34695, "total_steps": 37885, "loss": 0.0004, "lr": 4.2912680184638564e-08, "epoch": 4.578989045796489, "percentage": 91.58, "elapsed_time": "0:51:21", "remaining_time": "0:04:43", "throughput": 5542.44, "total_tokens": 17077776} +{"current_steps": 34700, "total_steps": 37885, "loss": 0.0657, "lr": 4.277927158146688e-08, "epoch": 4.5796489375742375, "percentage": 91.59, "elapsed_time": "0:51:21", "remaining_time": "0:04:42", "throughput": 5542.66, "total_tokens": 17080336} +{"current_steps": 34705, "total_steps": 37885, "loss": 0.0066, "lr": 4.264606614146327e-08, "epoch": 4.580308829351987, "percentage": 91.61, "elapsed_time": "0:51:21", "remaining_time": "0:04:42", "throughput": 5542.77, "total_tokens": 17082576} +{"current_steps": 34710, "total_steps": 37885, "loss": 0.0, "lr": 4.251306389289944e-08, "epoch": 4.580968721129735, "percentage": 91.62, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5542.93, "total_tokens": 17084880} +{"current_steps": 34715, "total_steps": 37885, "loss": 0.0, "lr": 4.2380264864004143e-08, "epoch": 4.581628612907483, "percentage": 91.63, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5543.15, "total_tokens": 17087440} +{"current_steps": 34720, "total_steps": 37885, "loss": 0.0, "lr": 4.2247669082963065e-08, "epoch": 4.5822885046852315, "percentage": 91.65, "elapsed_time": "0:51:22", "remaining_time": "0:04:41", "throughput": 5543.31, "total_tokens": 17089808} +{"current_steps": 34725, "total_steps": 37885, "loss": 0.0, "lr": 4.211527657791891e-08, "epoch": 4.58294839646298, "percentage": 91.66, "elapsed_time": "0:51:23", "remaining_time": "0:04:40", "throughput": 5543.44, "total_tokens": 17092048} +{"current_steps": 34730, "total_steps": 37885, "loss": 0.0, "lr": 4.198308737697087e-08, "epoch": 4.583608288240729, "percentage": 91.67, "elapsed_time": "0:51:23", "remaining_time": "0:04:40", "throughput": 5543.65, "total_tokens": 17094544} +{"current_steps": 34735, "total_steps": 37885, "loss": 0.0001, "lr": 4.1851101508174834e-08, "epoch": 4.584268180018477, "percentage": 91.69, "elapsed_time": "0:51:23", "remaining_time": "0:04:39", "throughput": 5543.77, "total_tokens": 17096784} +{"current_steps": 34740, "total_steps": 37885, "loss": 0.0001, "lr": 4.171931899954439e-08, "epoch": 4.5849280717962255, "percentage": 91.7, "elapsed_time": "0:51:24", "remaining_time": "0:04:39", "throughput": 5543.94, "total_tokens": 17099152} +{"current_steps": 34745, "total_steps": 37885, "loss": 0.0, "lr": 4.1587739879049067e-08, "epoch": 4.585587963573974, "percentage": 91.71, "elapsed_time": "0:51:24", "remaining_time": "0:04:38", "throughput": 5544.03, "total_tokens": 17101264} +{"current_steps": 34750, "total_steps": 37885, "loss": 0.0, "lr": 4.145636417461573e-08, "epoch": 4.586247855351722, "percentage": 91.72, "elapsed_time": "0:51:24", "remaining_time": "0:04:38", "throughput": 5544.16, "total_tokens": 17103504} +{"current_steps": 34755, "total_steps": 37885, "loss": 0.0, "lr": 4.132519191412787e-08, "epoch": 4.586907747129471, "percentage": 91.74, "elapsed_time": "0:51:25", "remaining_time": "0:04:37", "throughput": 5544.3, "total_tokens": 17105744} +{"current_steps": 34760, "total_steps": 37885, "loss": 0.0176, "lr": 4.1194223125425753e-08, "epoch": 4.5875676389072195, "percentage": 91.75, "elapsed_time": "0:51:25", "remaining_time": "0:04:37", "throughput": 5544.53, "total_tokens": 17108304} +{"current_steps": 34765, "total_steps": 37885, "loss": 0.0, "lr": 4.1063457836306716e-08, "epoch": 4.588227530684968, "percentage": 91.76, "elapsed_time": "0:51:25", "remaining_time": "0:04:36", "throughput": 5544.66, "total_tokens": 17110544} +{"current_steps": 34770, "total_steps": 37885, "loss": 0.0, "lr": 4.0932896074524546e-08, "epoch": 4.588887422462716, "percentage": 91.78, "elapsed_time": "0:51:26", "remaining_time": "0:04:36", "throughput": 5544.89, "total_tokens": 17113104} +{"current_steps": 34775, "total_steps": 37885, "loss": 0.0226, "lr": 4.080253786779042e-08, "epoch": 4.589547314240464, "percentage": 91.79, "elapsed_time": "0:51:26", "remaining_time": "0:04:36", "throughput": 5545.07, "total_tokens": 17115472} +{"current_steps": 34780, "total_steps": 37885, "loss": 0.0033, "lr": 4.0672383243771643e-08, "epoch": 4.590207206018213, "percentage": 91.8, "elapsed_time": "0:51:26", "remaining_time": "0:04:35", "throughput": 5545.29, "total_tokens": 17118032} +{"current_steps": 34785, "total_steps": 37885, "loss": 0.0, "lr": 4.054243223009246e-08, "epoch": 4.590867097795962, "percentage": 91.82, "elapsed_time": "0:51:27", "remaining_time": "0:04:35", "throughput": 5545.53, "total_tokens": 17120592} +{"current_steps": 34790, "total_steps": 37885, "loss": 0.0, "lr": 4.041268485433413e-08, "epoch": 4.59152698957371, "percentage": 91.83, "elapsed_time": "0:51:27", "remaining_time": "0:04:34", "throughput": 5545.69, "total_tokens": 17122896} +{"current_steps": 34795, "total_steps": 37885, "loss": 0.0, "lr": 4.028314114403475e-08, "epoch": 4.592186881351458, "percentage": 91.84, "elapsed_time": "0:51:27", "remaining_time": "0:04:34", "throughput": 5545.92, "total_tokens": 17125456} +{"current_steps": 34800, "total_steps": 37885, "loss": 0.0, "lr": 4.015380112668909e-08, "epoch": 4.592846773129207, "percentage": 91.86, "elapsed_time": "0:51:28", "remaining_time": "0:04:33", "throughput": 5546.16, "total_tokens": 17128016} +{"current_steps": 34805, "total_steps": 37885, "loss": 0.0, "lr": 4.002466482974831e-08, "epoch": 4.593506664906955, "percentage": 91.87, "elapsed_time": "0:51:28", "remaining_time": "0:04:33", "throughput": 5546.38, "total_tokens": 17130512} +{"current_steps": 34810, "total_steps": 37885, "loss": 0.0364, "lr": 3.989573228062082e-08, "epoch": 4.594166556684704, "percentage": 91.88, "elapsed_time": "0:51:28", "remaining_time": "0:04:32", "throughput": 5546.57, "total_tokens": 17132944} +{"current_steps": 34815, "total_steps": 37885, "loss": 0.0, "lr": 3.976700350667173e-08, "epoch": 4.594826448462452, "percentage": 91.9, "elapsed_time": "0:51:29", "remaining_time": "0:04:32", "throughput": 5546.79, "total_tokens": 17135440} +{"current_steps": 34820, "total_steps": 37885, "loss": 0.0, "lr": 3.963847853522262e-08, "epoch": 4.595486340240201, "percentage": 91.91, "elapsed_time": "0:51:29", "remaining_time": "0:04:31", "throughput": 5546.99, "total_tokens": 17137872} +{"current_steps": 34825, "total_steps": 37885, "loss": 0.0, "lr": 3.951015739355201e-08, "epoch": 4.596146232017949, "percentage": 91.92, "elapsed_time": "0:51:29", "remaining_time": "0:04:31", "throughput": 5547.15, "total_tokens": 17140176} +{"current_steps": 34830, "total_steps": 37885, "loss": 0.0, "lr": 3.9382040108895344e-08, "epoch": 4.596806123795697, "percentage": 91.94, "elapsed_time": "0:51:30", "remaining_time": "0:04:31", "throughput": 5547.37, "total_tokens": 17142672} +{"current_steps": 34835, "total_steps": 37885, "loss": 0.0004, "lr": 3.925412670844419e-08, "epoch": 4.5974660155734455, "percentage": 91.95, "elapsed_time": "0:51:30", "remaining_time": "0:04:30", "throughput": 5547.6, "total_tokens": 17145232} +{"current_steps": 34840, "total_steps": 37885, "loss": 0.0, "lr": 3.9126417219347506e-08, "epoch": 4.598125907351195, "percentage": 91.96, "elapsed_time": "0:51:30", "remaining_time": "0:04:30", "throughput": 5547.79, "total_tokens": 17147600} +{"current_steps": 34845, "total_steps": 37885, "loss": 0.0, "lr": 3.899891166871072e-08, "epoch": 4.598785799128943, "percentage": 91.98, "elapsed_time": "0:51:31", "remaining_time": "0:04:29", "throughput": 5547.98, "total_tokens": 17150032} +{"current_steps": 34850, "total_steps": 37885, "loss": 0.0003, "lr": 3.8871610083595965e-08, "epoch": 4.599445690906691, "percentage": 91.99, "elapsed_time": "0:51:31", "remaining_time": "0:04:29", "throughput": 5548.16, "total_tokens": 17152400} +{"current_steps": 34855, "total_steps": 37885, "loss": 0.0214, "lr": 3.874451249102195e-08, "epoch": 4.6001055826844395, "percentage": 92.0, "elapsed_time": "0:51:31", "remaining_time": "0:04:28", "throughput": 5548.36, "total_tokens": 17154896} +{"current_steps": 34860, "total_steps": 37885, "loss": 0.0002, "lr": 3.861761891796433e-08, "epoch": 4.600765474462188, "percentage": 92.02, "elapsed_time": "0:51:32", "remaining_time": "0:04:28", "throughput": 5548.54, "total_tokens": 17157264} +{"current_steps": 34865, "total_steps": 37885, "loss": 0.0, "lr": 3.8490929391355345e-08, "epoch": 4.601425366239937, "percentage": 92.03, "elapsed_time": "0:51:32", "remaining_time": "0:04:27", "throughput": 5548.72, "total_tokens": 17159632} +{"current_steps": 34870, "total_steps": 37885, "loss": 0.0, "lr": 3.83644439380838e-08, "epoch": 4.602085258017685, "percentage": 92.04, "elapsed_time": "0:51:32", "remaining_time": "0:04:27", "throughput": 5548.9, "total_tokens": 17162000} +{"current_steps": 34875, "total_steps": 37885, "loss": 0.0, "lr": 3.823816258499546e-08, "epoch": 4.6027451497954335, "percentage": 92.05, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.06, "total_tokens": 17164304} +{"current_steps": 34880, "total_steps": 37885, "loss": 0.0105, "lr": 3.811208535889265e-08, "epoch": 4.603405041573182, "percentage": 92.07, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.33, "total_tokens": 17166992} +{"current_steps": 34885, "total_steps": 37885, "loss": 0.0411, "lr": 3.79862122865342e-08, "epoch": 4.60406493335093, "percentage": 92.08, "elapsed_time": "0:51:33", "remaining_time": "0:04:26", "throughput": 5549.56, "total_tokens": 17169552} +{"current_steps": 34890, "total_steps": 37885, "loss": 0.0, "lr": 3.786054339463596e-08, "epoch": 4.604724825128679, "percentage": 92.09, "elapsed_time": "0:51:34", "remaining_time": "0:04:25", "throughput": 5549.82, "total_tokens": 17172176} +{"current_steps": 34895, "total_steps": 37885, "loss": 0.0, "lr": 3.7735078709869804e-08, "epoch": 4.6053847169064275, "percentage": 92.11, "elapsed_time": "0:51:34", "remaining_time": "0:04:25", "throughput": 5549.96, "total_tokens": 17174416} +{"current_steps": 34900, "total_steps": 37885, "loss": 0.0, "lr": 3.760981825886533e-08, "epoch": 4.606044608684176, "percentage": 92.12, "elapsed_time": "0:51:34", "remaining_time": "0:04:24", "throughput": 5550.11, "total_tokens": 17176720} +{"current_steps": 34905, "total_steps": 37885, "loss": 0.0239, "lr": 3.748476206820783e-08, "epoch": 4.606704500461924, "percentage": 92.13, "elapsed_time": "0:51:35", "remaining_time": "0:04:24", "throughput": 5550.33, "total_tokens": 17179216} +{"current_steps": 34910, "total_steps": 37885, "loss": 0.0016, "lr": 3.735991016443929e-08, "epoch": 4.607364392239672, "percentage": 92.15, "elapsed_time": "0:51:35", "remaining_time": "0:04:23", "throughput": 5550.53, "total_tokens": 17181648} +{"current_steps": 34915, "total_steps": 37885, "loss": 0.0337, "lr": 3.723526257405929e-08, "epoch": 4.6080242840174215, "percentage": 92.16, "elapsed_time": "0:51:35", "remaining_time": "0:04:23", "throughput": 5550.79, "total_tokens": 17184272} +{"current_steps": 34920, "total_steps": 37885, "loss": 0.0, "lr": 3.711081932352278e-08, "epoch": 4.60868417579517, "percentage": 92.17, "elapsed_time": "0:51:36", "remaining_time": "0:04:22", "throughput": 5550.98, "total_tokens": 17186704} +{"current_steps": 34925, "total_steps": 37885, "loss": 0.0005, "lr": 3.698658043924241e-08, "epoch": 4.609344067572918, "percentage": 92.19, "elapsed_time": "0:51:36", "remaining_time": "0:04:22", "throughput": 5551.2, "total_tokens": 17189200} +{"current_steps": 34930, "total_steps": 37885, "loss": 0.0, "lr": 3.686254594758653e-08, "epoch": 4.610003959350666, "percentage": 92.2, "elapsed_time": "0:51:36", "remaining_time": "0:04:21", "throughput": 5551.47, "total_tokens": 17191888} +{"current_steps": 34935, "total_steps": 37885, "loss": 0.0, "lr": 3.673871587488076e-08, "epoch": 4.610663851128415, "percentage": 92.21, "elapsed_time": "0:51:37", "remaining_time": "0:04:21", "throughput": 5551.72, "total_tokens": 17194448} +{"current_steps": 34940, "total_steps": 37885, "loss": 0.028, "lr": 3.661509024740739e-08, "epoch": 4.611323742906164, "percentage": 92.23, "elapsed_time": "0:51:37", "remaining_time": "0:04:21", "throughput": 5551.99, "total_tokens": 17197136} +{"current_steps": 34945, "total_steps": 37885, "loss": 0.0, "lr": 3.6491669091404553e-08, "epoch": 4.611983634683912, "percentage": 92.24, "elapsed_time": "0:51:37", "remaining_time": "0:04:20", "throughput": 5552.16, "total_tokens": 17199504} +{"current_steps": 34950, "total_steps": 37885, "loss": 0.0, "lr": 3.636845243306785e-08, "epoch": 4.61264352646166, "percentage": 92.25, "elapsed_time": "0:51:38", "remaining_time": "0:04:20", "throughput": 5552.32, "total_tokens": 17201808} +{"current_steps": 34955, "total_steps": 37885, "loss": 0.0001, "lr": 3.624544029854914e-08, "epoch": 4.613303418239409, "percentage": 92.27, "elapsed_time": "0:51:38", "remaining_time": "0:04:19", "throughput": 5552.56, "total_tokens": 17204432} +{"current_steps": 34960, "total_steps": 37885, "loss": 0.0001, "lr": 3.6122632713956766e-08, "epoch": 4.613963310017157, "percentage": 92.28, "elapsed_time": "0:51:38", "remaining_time": "0:04:19", "throughput": 5552.77, "total_tokens": 17206928} +{"current_steps": 34965, "total_steps": 37885, "loss": 0.0001, "lr": 3.600002970535565e-08, "epoch": 4.614623201794906, "percentage": 92.29, "elapsed_time": "0:51:39", "remaining_time": "0:04:18", "throughput": 5553.02, "total_tokens": 17209552} +{"current_steps": 34970, "total_steps": 37885, "loss": 0.0, "lr": 3.587763129876753e-08, "epoch": 4.615283093572654, "percentage": 92.31, "elapsed_time": "0:51:39", "remaining_time": "0:04:18", "throughput": 5553.24, "total_tokens": 17212048} +{"current_steps": 34975, "total_steps": 37885, "loss": 0.0, "lr": 3.575543752017063e-08, "epoch": 4.615942985350403, "percentage": 92.32, "elapsed_time": "0:51:39", "remaining_time": "0:04:17", "throughput": 5553.33, "total_tokens": 17214160} +{"current_steps": 34980, "total_steps": 37885, "loss": 0.0003, "lr": 3.563344839549942e-08, "epoch": 4.616602877128151, "percentage": 92.33, "elapsed_time": "0:51:40", "remaining_time": "0:04:17", "throughput": 5553.55, "total_tokens": 17216656} +{"current_steps": 34985, "total_steps": 37885, "loss": 0.0001, "lr": 3.5511663950645534e-08, "epoch": 4.617262768905899, "percentage": 92.35, "elapsed_time": "0:51:40", "remaining_time": "0:04:17", "throughput": 5553.72, "total_tokens": 17219024} +{"current_steps": 34990, "total_steps": 37885, "loss": 0.0, "lr": 3.539008421145673e-08, "epoch": 4.617922660683648, "percentage": 92.36, "elapsed_time": "0:51:40", "remaining_time": "0:04:16", "throughput": 5553.97, "total_tokens": 17221648} +{"current_steps": 34995, "total_steps": 37885, "loss": 0.0001, "lr": 3.526870920373726e-08, "epoch": 4.618582552461397, "percentage": 92.37, "elapsed_time": "0:51:41", "remaining_time": "0:04:16", "throughput": 5554.13, "total_tokens": 17223952} +{"current_steps": 35000, "total_steps": 37885, "loss": 0.0, "lr": 3.514753895324829e-08, "epoch": 4.619242444239145, "percentage": 92.38, "elapsed_time": "0:51:41", "remaining_time": "0:04:15", "throughput": 5554.34, "total_tokens": 17226448} +{"current_steps": 35005, "total_steps": 37885, "loss": 0.0, "lr": 3.5026573485707253e-08, "epoch": 4.619902336016893, "percentage": 92.4, "elapsed_time": "0:51:41", "remaining_time": "0:04:15", "throughput": 5554.56, "total_tokens": 17228944} +{"current_steps": 35010, "total_steps": 37885, "loss": 0.0, "lr": 3.4905812826788285e-08, "epoch": 4.620562227794641, "percentage": 92.41, "elapsed_time": "0:51:42", "remaining_time": "0:04:14", "throughput": 5554.76, "total_tokens": 17231376} +{"current_steps": 35015, "total_steps": 37885, "loss": 0.0411, "lr": 3.478525700212176e-08, "epoch": 4.621222119572391, "percentage": 92.42, "elapsed_time": "0:51:42", "remaining_time": "0:04:14", "throughput": 5555.12, "total_tokens": 17234384} +{"current_steps": 35020, "total_steps": 37885, "loss": 0.004, "lr": 3.4664906037294996e-08, "epoch": 4.621882011350139, "percentage": 92.44, "elapsed_time": "0:51:42", "remaining_time": "0:04:13", "throughput": 5555.32, "total_tokens": 17236816} +{"current_steps": 35025, "total_steps": 37885, "loss": 0.0, "lr": 3.4544759957851553e-08, "epoch": 4.622541903127887, "percentage": 92.45, "elapsed_time": "0:51:43", "remaining_time": "0:04:13", "throughput": 5555.44, "total_tokens": 17238992} +{"current_steps": 35030, "total_steps": 37885, "loss": 0.0001, "lr": 3.4424818789291373e-08, "epoch": 4.623201794905635, "percentage": 92.46, "elapsed_time": "0:51:43", "remaining_time": "0:04:12", "throughput": 5555.69, "total_tokens": 17241616} +{"current_steps": 35035, "total_steps": 37885, "loss": 0.0, "lr": 3.4305082557071316e-08, "epoch": 4.623861686683384, "percentage": 92.48, "elapsed_time": "0:51:43", "remaining_time": "0:04:12", "throughput": 5555.81, "total_tokens": 17243792} +{"current_steps": 35040, "total_steps": 37885, "loss": 0.0, "lr": 3.418555128660461e-08, "epoch": 4.624521578461132, "percentage": 92.49, "elapsed_time": "0:51:44", "remaining_time": "0:04:12", "throughput": 5556.02, "total_tokens": 17246288} +{"current_steps": 35045, "total_steps": 37885, "loss": 0.0, "lr": 3.406622500326062e-08, "epoch": 4.625181470238881, "percentage": 92.5, "elapsed_time": "0:51:44", "remaining_time": "0:04:11", "throughput": 5556.31, "total_tokens": 17249040} +{"current_steps": 35050, "total_steps": 37885, "loss": 0.0018, "lr": 3.3947103732365646e-08, "epoch": 4.625841362016629, "percentage": 92.52, "elapsed_time": "0:51:44", "remaining_time": "0:04:11", "throughput": 5556.56, "total_tokens": 17251664} +{"current_steps": 35055, "total_steps": 37885, "loss": 0.0, "lr": 3.382818749920224e-08, "epoch": 4.626501253794378, "percentage": 92.53, "elapsed_time": "0:51:45", "remaining_time": "0:04:10", "throughput": 5556.77, "total_tokens": 17254160} +{"current_steps": 35060, "total_steps": 37885, "loss": 0.0239, "lr": 3.370947632900978e-08, "epoch": 4.627161145572126, "percentage": 92.54, "elapsed_time": "0:51:45", "remaining_time": "0:04:10", "throughput": 5557.03, "total_tokens": 17256784} +{"current_steps": 35065, "total_steps": 37885, "loss": 0.0, "lr": 3.3590970246983654e-08, "epoch": 4.627821037349874, "percentage": 92.56, "elapsed_time": "0:51:45", "remaining_time": "0:04:09", "throughput": 5557.18, "total_tokens": 17259088} +{"current_steps": 35070, "total_steps": 37885, "loss": 0.0, "lr": 3.3472669278275637e-08, "epoch": 4.628480929127623, "percentage": 92.57, "elapsed_time": "0:51:46", "remaining_time": "0:04:09", "throughput": 5557.41, "total_tokens": 17261648} +{"current_steps": 35075, "total_steps": 37885, "loss": 0.0035, "lr": 3.3354573447994637e-08, "epoch": 4.629140820905372, "percentage": 92.58, "elapsed_time": "0:51:46", "remaining_time": "0:04:08", "throughput": 5557.68, "total_tokens": 17264336} +{"current_steps": 35080, "total_steps": 37885, "loss": 0.0, "lr": 3.3236682781205616e-08, "epoch": 4.62980071268312, "percentage": 92.6, "elapsed_time": "0:51:46", "remaining_time": "0:04:08", "throughput": 5557.82, "total_tokens": 17266576} +{"current_steps": 35085, "total_steps": 37885, "loss": 0.0, "lr": 3.311899730292989e-08, "epoch": 4.630460604460868, "percentage": 92.61, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.0, "total_tokens": 17268944} +{"current_steps": 35090, "total_steps": 37885, "loss": 0.0, "lr": 3.3001517038145356e-08, "epoch": 4.6311204962386165, "percentage": 92.62, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.19, "total_tokens": 17271376} +{"current_steps": 35095, "total_steps": 37885, "loss": 0.0, "lr": 3.28842420117863e-08, "epoch": 4.631780388016365, "percentage": 92.64, "elapsed_time": "0:51:47", "remaining_time": "0:04:07", "throughput": 5558.38, "total_tokens": 17273808} +{"current_steps": 35100, "total_steps": 37885, "loss": 0.0, "lr": 3.27671722487437e-08, "epoch": 4.632440279794114, "percentage": 92.65, "elapsed_time": "0:51:48", "remaining_time": "0:04:06", "throughput": 5558.54, "total_tokens": 17276112} +{"current_steps": 35105, "total_steps": 37885, "loss": 0.0005, "lr": 3.265030777386446e-08, "epoch": 4.633100171571862, "percentage": 92.66, "elapsed_time": "0:51:48", "remaining_time": "0:04:06", "throughput": 5558.71, "total_tokens": 17278480} +{"current_steps": 35110, "total_steps": 37885, "loss": 0.0252, "lr": 3.2533648611952623e-08, "epoch": 4.6337600633496105, "percentage": 92.68, "elapsed_time": "0:51:48", "remaining_time": "0:04:05", "throughput": 5559.02, "total_tokens": 17281296} +{"current_steps": 35115, "total_steps": 37885, "loss": 0.0, "lr": 3.241719478776805e-08, "epoch": 4.634419955127359, "percentage": 92.69, "elapsed_time": "0:51:49", "remaining_time": "0:04:05", "throughput": 5559.29, "total_tokens": 17283984} +{"current_steps": 35120, "total_steps": 37885, "loss": 0.0, "lr": 3.230094632602698e-08, "epoch": 4.635079846905107, "percentage": 92.7, "elapsed_time": "0:51:49", "remaining_time": "0:04:04", "throughput": 5559.47, "total_tokens": 17286352} +{"current_steps": 35125, "total_steps": 37885, "loss": 0.0, "lr": 3.218490325140266e-08, "epoch": 4.635739738682856, "percentage": 92.71, "elapsed_time": "0:51:49", "remaining_time": "0:04:04", "throughput": 5559.74, "total_tokens": 17289040} +{"current_steps": 35130, "total_steps": 37885, "loss": 0.0, "lr": 3.206906558852418e-08, "epoch": 4.6363996304606045, "percentage": 92.73, "elapsed_time": "0:51:50", "remaining_time": "0:04:03", "throughput": 5559.95, "total_tokens": 17291536} +{"current_steps": 35135, "total_steps": 37885, "loss": 0.0, "lr": 3.195343336197742e-08, "epoch": 4.637059522238353, "percentage": 92.74, "elapsed_time": "0:51:50", "remaining_time": "0:04:03", "throughput": 5560.2, "total_tokens": 17294160} +{"current_steps": 35140, "total_steps": 37885, "loss": 0.0755, "lr": 3.183800659630431e-08, "epoch": 4.637719414016101, "percentage": 92.75, "elapsed_time": "0:51:50", "remaining_time": "0:04:02", "throughput": 5560.49, "total_tokens": 17296912} +{"current_steps": 35145, "total_steps": 37885, "loss": 0.0, "lr": 3.1722785316003475e-08, "epoch": 4.638379305793849, "percentage": 92.77, "elapsed_time": "0:51:51", "remaining_time": "0:04:02", "throughput": 5560.68, "total_tokens": 17299344} +{"current_steps": 35150, "total_steps": 37885, "loss": 0.0011, "lr": 3.160776954552979e-08, "epoch": 4.6390391975715985, "percentage": 92.78, "elapsed_time": "0:51:51", "remaining_time": "0:04:02", "throughput": 5560.9, "total_tokens": 17301840} +{"current_steps": 35155, "total_steps": 37885, "loss": 0.0518, "lr": 3.149295930929441e-08, "epoch": 4.639699089349347, "percentage": 92.79, "elapsed_time": "0:51:51", "remaining_time": "0:04:01", "throughput": 5561.1, "total_tokens": 17304336} +{"current_steps": 35160, "total_steps": 37885, "loss": 0.0016, "lr": 3.137835463166494e-08, "epoch": 4.640358981127095, "percentage": 92.81, "elapsed_time": "0:51:52", "remaining_time": "0:04:01", "throughput": 5561.3, "total_tokens": 17306768} +{"current_steps": 35165, "total_steps": 37885, "loss": 0.0305, "lr": 3.12639555369657e-08, "epoch": 4.641018872904843, "percentage": 92.82, "elapsed_time": "0:51:52", "remaining_time": "0:04:00", "throughput": 5561.37, "total_tokens": 17308816} +{"current_steps": 35170, "total_steps": 37885, "loss": 0.0226, "lr": 3.1149762049476724e-08, "epoch": 4.641678764682592, "percentage": 92.83, "elapsed_time": "0:51:52", "remaining_time": "0:04:00", "throughput": 5561.69, "total_tokens": 17311696} +{"current_steps": 35175, "total_steps": 37885, "loss": 0.0, "lr": 3.103577419343484e-08, "epoch": 4.642338656460341, "percentage": 92.85, "elapsed_time": "0:51:52", "remaining_time": "0:03:59", "throughput": 5561.84, "total_tokens": 17314000} +{"current_steps": 35180, "total_steps": 37885, "loss": 0.0, "lr": 3.092199199303325e-08, "epoch": 4.642998548238089, "percentage": 92.86, "elapsed_time": "0:51:53", "remaining_time": "0:03:59", "throughput": 5562.01, "total_tokens": 17316368} +{"current_steps": 35185, "total_steps": 37885, "loss": 0.0, "lr": 3.0808415472421413e-08, "epoch": 4.643658440015837, "percentage": 92.87, "elapsed_time": "0:51:53", "remaining_time": "0:03:58", "throughput": 5562.2, "total_tokens": 17318800} +{"current_steps": 35190, "total_steps": 37885, "loss": 0.036, "lr": 3.069504465570505e-08, "epoch": 4.644318331793586, "percentage": 92.89, "elapsed_time": "0:51:53", "remaining_time": "0:03:58", "throughput": 5562.42, "total_tokens": 17321296} +{"current_steps": 35195, "total_steps": 37885, "loss": 0.0, "lr": 3.0581879566946243e-08, "epoch": 4.644978223571334, "percentage": 92.9, "elapsed_time": "0:51:54", "remaining_time": "0:03:58", "throughput": 5562.53, "total_tokens": 17323472} +{"current_steps": 35200, "total_steps": 37885, "loss": 0.0, "lr": 3.046892023016356e-08, "epoch": 4.645638115349083, "percentage": 92.91, "elapsed_time": "0:51:54", "remaining_time": "0:03:57", "throughput": 5562.67, "total_tokens": 17325712} +{"current_steps": 35205, "total_steps": 37885, "loss": 0.0, "lr": 3.035616666933183e-08, "epoch": 4.646298007126831, "percentage": 92.93, "elapsed_time": "0:51:54", "remaining_time": "0:03:57", "throughput": 5562.96, "total_tokens": 17328464} +{"current_steps": 35210, "total_steps": 37885, "loss": 0.0, "lr": 3.024361890838201e-08, "epoch": 4.64695789890458, "percentage": 92.94, "elapsed_time": "0:51:55", "remaining_time": "0:03:56", "throughput": 5563.17, "total_tokens": 17330960} +{"current_steps": 35215, "total_steps": 37885, "loss": 0.0001, "lr": 3.013127697120166e-08, "epoch": 4.647617790682328, "percentage": 92.95, "elapsed_time": "0:51:55", "remaining_time": "0:03:56", "throughput": 5563.47, "total_tokens": 17333776} +{"current_steps": 35220, "total_steps": 37885, "loss": 0.0028, "lr": 3.00191408816346e-08, "epoch": 4.648277682460076, "percentage": 92.97, "elapsed_time": "0:51:55", "remaining_time": "0:03:55", "throughput": 5563.64, "total_tokens": 17336144} +{"current_steps": 35225, "total_steps": 37885, "loss": 0.0, "lr": 2.99072106634809e-08, "epoch": 4.648937574237825, "percentage": 92.98, "elapsed_time": "0:51:56", "remaining_time": "0:03:55", "throughput": 5563.86, "total_tokens": 17338640} +{"current_steps": 35230, "total_steps": 37885, "loss": 0.0, "lr": 2.9795486340496557e-08, "epoch": 4.649597466015574, "percentage": 92.99, "elapsed_time": "0:51:56", "remaining_time": "0:03:54", "throughput": 5564.06, "total_tokens": 17341136} +{"current_steps": 35235, "total_steps": 37885, "loss": 0.0, "lr": 2.968396793639494e-08, "epoch": 4.650257357793322, "percentage": 93.01, "elapsed_time": "0:51:56", "remaining_time": "0:03:54", "throughput": 5564.15, "total_tokens": 17343248} +{"current_steps": 35240, "total_steps": 37885, "loss": 0.0, "lr": 2.9572655474844555e-08, "epoch": 4.65091724957107, "percentage": 93.02, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.37, "total_tokens": 17345744} +{"current_steps": 35245, "total_steps": 37885, "loss": 0.0, "lr": 2.9461548979470507e-08, "epoch": 4.6515771413488185, "percentage": 93.03, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.63, "total_tokens": 17348432} +{"current_steps": 35250, "total_steps": 37885, "loss": 0.0001, "lr": 2.9350648473854933e-08, "epoch": 4.652237033126568, "percentage": 93.04, "elapsed_time": "0:51:57", "remaining_time": "0:03:53", "throughput": 5564.82, "total_tokens": 17350864} +{"current_steps": 35255, "total_steps": 37885, "loss": 0.0, "lr": 2.9239953981535116e-08, "epoch": 4.652896924904316, "percentage": 93.06, "elapsed_time": "0:51:58", "remaining_time": "0:03:52", "throughput": 5565.04, "total_tokens": 17353360} +{"current_steps": 35260, "total_steps": 37885, "loss": 0.0, "lr": 2.9129465526005592e-08, "epoch": 4.653556816682064, "percentage": 93.07, "elapsed_time": "0:51:58", "remaining_time": "0:03:52", "throughput": 5565.19, "total_tokens": 17355664} +{"current_steps": 35265, "total_steps": 37885, "loss": 0.0, "lr": 2.9019183130716386e-08, "epoch": 4.6542167084598125, "percentage": 93.08, "elapsed_time": "0:51:58", "remaining_time": "0:03:51", "throughput": 5565.44, "total_tokens": 17358288} +{"current_steps": 35270, "total_steps": 37885, "loss": 0.0032, "lr": 2.8909106819074214e-08, "epoch": 4.654876600237561, "percentage": 93.1, "elapsed_time": "0:51:59", "remaining_time": "0:03:51", "throughput": 5565.54, "total_tokens": 17360400} +{"current_steps": 35275, "total_steps": 37885, "loss": 0.0, "lr": 2.8799236614442168e-08, "epoch": 4.65553649201531, "percentage": 93.11, "elapsed_time": "0:51:59", "remaining_time": "0:03:50", "throughput": 5565.84, "total_tokens": 17363216} +{"current_steps": 35280, "total_steps": 37885, "loss": 0.0, "lr": 2.868957254013915e-08, "epoch": 4.656196383793058, "percentage": 93.12, "elapsed_time": "0:51:59", "remaining_time": "0:03:50", "throughput": 5566.01, "total_tokens": 17365584} +{"current_steps": 35285, "total_steps": 37885, "loss": 0.0, "lr": 2.8580114619440655e-08, "epoch": 4.6568562755708065, "percentage": 93.14, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.14, "total_tokens": 17367824} +{"current_steps": 35290, "total_steps": 37885, "loss": 0.0, "lr": 2.8470862875578427e-08, "epoch": 4.657516167348555, "percentage": 93.15, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.28, "total_tokens": 17370064} +{"current_steps": 35295, "total_steps": 37885, "loss": 0.0, "lr": 2.836181733174037e-08, "epoch": 4.658176059126303, "percentage": 93.16, "elapsed_time": "0:52:00", "remaining_time": "0:03:49", "throughput": 5566.53, "total_tokens": 17372688} +{"current_steps": 35300, "total_steps": 37885, "loss": 0.0, "lr": 2.8252978011070404e-08, "epoch": 4.658835950904052, "percentage": 93.18, "elapsed_time": "0:52:01", "remaining_time": "0:03:48", "throughput": 5566.78, "total_tokens": 17375312} +{"current_steps": 35305, "total_steps": 37885, "loss": 0.0, "lr": 2.8144344936669062e-08, "epoch": 4.6594958426818005, "percentage": 93.19, "elapsed_time": "0:52:01", "remaining_time": "0:03:48", "throughput": 5566.98, "total_tokens": 17377744} +{"current_steps": 35310, "total_steps": 37885, "loss": 0.0, "lr": 2.8035918131592895e-08, "epoch": 4.660155734459549, "percentage": 93.2, "elapsed_time": "0:52:01", "remaining_time": "0:03:47", "throughput": 5567.18, "total_tokens": 17380240} +{"current_steps": 35315, "total_steps": 37885, "loss": 0.0132, "lr": 2.792769761885472e-08, "epoch": 4.660815626237297, "percentage": 93.22, "elapsed_time": "0:52:02", "remaining_time": "0:03:47", "throughput": 5567.35, "total_tokens": 17382608} +{"current_steps": 35320, "total_steps": 37885, "loss": 0.0, "lr": 2.781968342142349e-08, "epoch": 4.661475518015045, "percentage": 93.23, "elapsed_time": "0:52:02", "remaining_time": "0:03:46", "throughput": 5567.56, "total_tokens": 17385104} +{"current_steps": 35325, "total_steps": 37885, "loss": 0.0, "lr": 2.771187556222454e-08, "epoch": 4.662135409792794, "percentage": 93.24, "elapsed_time": "0:52:02", "remaining_time": "0:03:46", "throughput": 5567.8, "total_tokens": 17387664} +{"current_steps": 35330, "total_steps": 37885, "loss": 0.0011, "lr": 2.7604274064139123e-08, "epoch": 4.662795301570543, "percentage": 93.26, "elapsed_time": "0:52:03", "remaining_time": "0:03:45", "throughput": 5567.87, "total_tokens": 17389712} +{"current_steps": 35335, "total_steps": 37885, "loss": 0.0, "lr": 2.7496878950005077e-08, "epoch": 4.663455193348291, "percentage": 93.27, "elapsed_time": "0:52:03", "remaining_time": "0:03:45", "throughput": 5568.13, "total_tokens": 17392400} +{"current_steps": 35340, "total_steps": 37885, "loss": 0.0, "lr": 2.738969024261606e-08, "epoch": 4.664115085126039, "percentage": 93.28, "elapsed_time": "0:52:03", "remaining_time": "0:03:44", "throughput": 5568.4, "total_tokens": 17395088} +{"current_steps": 35345, "total_steps": 37885, "loss": 0.0, "lr": 2.7282707964722427e-08, "epoch": 4.664774976903788, "percentage": 93.3, "elapsed_time": "0:52:04", "remaining_time": "0:03:44", "throughput": 5568.6, "total_tokens": 17397520} +{"current_steps": 35350, "total_steps": 37885, "loss": 0.0008, "lr": 2.7175932139030022e-08, "epoch": 4.665434868681536, "percentage": 93.31, "elapsed_time": "0:52:04", "remaining_time": "0:03:44", "throughput": 5568.75, "total_tokens": 17399824} +{"current_steps": 35355, "total_steps": 37885, "loss": 0.0, "lr": 2.7069362788201267e-08, "epoch": 4.666094760459285, "percentage": 93.32, "elapsed_time": "0:52:04", "remaining_time": "0:03:43", "throughput": 5568.98, "total_tokens": 17402384} +{"current_steps": 35360, "total_steps": 37885, "loss": 0.0039, "lr": 2.6962999934855068e-08, "epoch": 4.666754652237033, "percentage": 93.34, "elapsed_time": "0:52:05", "remaining_time": "0:03:43", "throughput": 5569.15, "total_tokens": 17404752} +{"current_steps": 35365, "total_steps": 37885, "loss": 0.0, "lr": 2.6856843601565816e-08, "epoch": 4.667414544014782, "percentage": 93.35, "elapsed_time": "0:52:05", "remaining_time": "0:03:42", "throughput": 5569.34, "total_tokens": 17407184} +{"current_steps": 35370, "total_steps": 37885, "loss": 0.0, "lr": 2.6750893810864596e-08, "epoch": 4.66807443579253, "percentage": 93.36, "elapsed_time": "0:52:05", "remaining_time": "0:03:42", "throughput": 5569.56, "total_tokens": 17409680} +{"current_steps": 35375, "total_steps": 37885, "loss": 0.0381, "lr": 2.6645150585238528e-08, "epoch": 4.668734327570278, "percentage": 93.37, "elapsed_time": "0:52:06", "remaining_time": "0:03:41", "throughput": 5569.81, "total_tokens": 17412304} +{"current_steps": 35380, "total_steps": 37885, "loss": 0.0005, "lr": 2.653961394713067e-08, "epoch": 4.6693942193480265, "percentage": 93.39, "elapsed_time": "0:52:06", "remaining_time": "0:03:41", "throughput": 5570.0, "total_tokens": 17414736} +{"current_steps": 35385, "total_steps": 37885, "loss": 0.0002, "lr": 2.6434283918940424e-08, "epoch": 4.670054111125776, "percentage": 93.4, "elapsed_time": "0:52:06", "remaining_time": "0:03:40", "throughput": 5570.21, "total_tokens": 17417232} +{"current_steps": 35390, "total_steps": 37885, "loss": 0.0294, "lr": 2.6329160523023587e-08, "epoch": 4.670714002903524, "percentage": 93.41, "elapsed_time": "0:52:07", "remaining_time": "0:03:40", "throughput": 5570.57, "total_tokens": 17420240} +{"current_steps": 35395, "total_steps": 37885, "loss": 0.0, "lr": 2.6224243781691636e-08, "epoch": 4.671373894681272, "percentage": 93.43, "elapsed_time": "0:52:07", "remaining_time": "0:03:40", "throughput": 5570.83, "total_tokens": 17422928} +{"current_steps": 35400, "total_steps": 37885, "loss": 0.0, "lr": 2.6119533717212428e-08, "epoch": 4.6720337864590205, "percentage": 93.44, "elapsed_time": "0:52:07", "remaining_time": "0:03:39", "throughput": 5571.04, "total_tokens": 17425424} +{"current_steps": 35405, "total_steps": 37885, "loss": 0.0, "lr": 2.601503035180963e-08, "epoch": 4.672693678236769, "percentage": 93.45, "elapsed_time": "0:52:08", "remaining_time": "0:03:39", "throughput": 5571.25, "total_tokens": 17427920} +{"current_steps": 35410, "total_steps": 37885, "loss": 0.0, "lr": 2.5910733707663947e-08, "epoch": 4.673353570014518, "percentage": 93.47, "elapsed_time": "0:52:08", "remaining_time": "0:03:38", "throughput": 5571.47, "total_tokens": 17430416} +{"current_steps": 35415, "total_steps": 37885, "loss": 0.0, "lr": 2.5806643806910998e-08, "epoch": 4.674013461792266, "percentage": 93.48, "elapsed_time": "0:52:08", "remaining_time": "0:03:38", "throughput": 5571.65, "total_tokens": 17432784} +{"current_steps": 35420, "total_steps": 37885, "loss": 0.0, "lr": 2.5702760671643455e-08, "epoch": 4.6746733535700145, "percentage": 93.49, "elapsed_time": "0:52:09", "remaining_time": "0:03:37", "throughput": 5571.74, "total_tokens": 17434896} +{"current_steps": 35425, "total_steps": 37885, "loss": 0.0, "lr": 2.559908432390967e-08, "epoch": 4.675333245347763, "percentage": 93.51, "elapsed_time": "0:52:09", "remaining_time": "0:03:37", "throughput": 5571.9, "total_tokens": 17437200} +{"current_steps": 35430, "total_steps": 37885, "loss": 0.0219, "lr": 2.5495614785714047e-08, "epoch": 4.675993137125511, "percentage": 93.52, "elapsed_time": "0:52:09", "remaining_time": "0:03:36", "throughput": 5572.05, "total_tokens": 17439504} +{"current_steps": 35435, "total_steps": 37885, "loss": 0.0, "lr": 2.5392352079017576e-08, "epoch": 4.67665302890326, "percentage": 93.53, "elapsed_time": "0:52:10", "remaining_time": "0:03:36", "throughput": 5572.25, "total_tokens": 17442000} +{"current_steps": 35440, "total_steps": 37885, "loss": 0.0, "lr": 2.528929622573661e-08, "epoch": 4.6773129206810085, "percentage": 93.55, "elapsed_time": "0:52:10", "remaining_time": "0:03:35", "throughput": 5572.39, "total_tokens": 17444240} +{"current_steps": 35445, "total_steps": 37885, "loss": 0.0, "lr": 2.5186447247744436e-08, "epoch": 4.677972812458757, "percentage": 93.56, "elapsed_time": "0:52:10", "remaining_time": "0:03:35", "throughput": 5572.58, "total_tokens": 17446672} +{"current_steps": 35450, "total_steps": 37885, "loss": 0.0, "lr": 2.5083805166869698e-08, "epoch": 4.678632704236505, "percentage": 93.57, "elapsed_time": "0:52:11", "remaining_time": "0:03:35", "throughput": 5572.81, "total_tokens": 17449232} +{"current_steps": 35455, "total_steps": 37885, "loss": 0.0657, "lr": 2.4981370004897527e-08, "epoch": 4.679292596014253, "percentage": 93.59, "elapsed_time": "0:52:11", "remaining_time": "0:03:34", "throughput": 5573.08, "total_tokens": 17451920} +{"current_steps": 35460, "total_steps": 37885, "loss": 0.0, "lr": 2.487914178356898e-08, "epoch": 4.6799524877920025, "percentage": 93.6, "elapsed_time": "0:52:11", "remaining_time": "0:03:34", "throughput": 5573.23, "total_tokens": 17454224} +{"current_steps": 35465, "total_steps": 37885, "loss": 0.0, "lr": 2.4777120524581364e-08, "epoch": 4.680612379569751, "percentage": 93.61, "elapsed_time": "0:52:12", "remaining_time": "0:03:33", "throughput": 5573.46, "total_tokens": 17456784} +{"current_steps": 35470, "total_steps": 37885, "loss": 0.0487, "lr": 2.4675306249587912e-08, "epoch": 4.681272271347499, "percentage": 93.63, "elapsed_time": "0:52:12", "remaining_time": "0:03:33", "throughput": 5573.61, "total_tokens": 17459088} +{"current_steps": 35475, "total_steps": 37885, "loss": 0.0411, "lr": 2.45736989801979e-08, "epoch": 4.681932163125247, "percentage": 93.64, "elapsed_time": "0:52:12", "remaining_time": "0:03:32", "throughput": 5573.78, "total_tokens": 17461456} +{"current_steps": 35480, "total_steps": 37885, "loss": 0.0, "lr": 2.4472298737976848e-08, "epoch": 4.682592054902996, "percentage": 93.65, "elapsed_time": "0:52:13", "remaining_time": "0:03:32", "throughput": 5573.93, "total_tokens": 17463760} +{"current_steps": 35485, "total_steps": 37885, "loss": 0.0595, "lr": 2.4371105544446323e-08, "epoch": 4.683251946680745, "percentage": 93.67, "elapsed_time": "0:52:13", "remaining_time": "0:03:31", "throughput": 5574.1, "total_tokens": 17466128} +{"current_steps": 35490, "total_steps": 37885, "loss": 0.0002, "lr": 2.427011942108348e-08, "epoch": 4.683911838458493, "percentage": 93.68, "elapsed_time": "0:52:13", "remaining_time": "0:03:31", "throughput": 5574.31, "total_tokens": 17468624} +{"current_steps": 35495, "total_steps": 37885, "loss": 0.0595, "lr": 2.416934038932217e-08, "epoch": 4.684571730236241, "percentage": 93.69, "elapsed_time": "0:52:14", "remaining_time": "0:03:31", "throughput": 5574.46, "total_tokens": 17470928} +{"current_steps": 35500, "total_steps": 37885, "loss": 0.0095, "lr": 2.406876847055206e-08, "epoch": 4.68523162201399, "percentage": 93.7, "elapsed_time": "0:52:14", "remaining_time": "0:03:30", "throughput": 5574.71, "total_tokens": 17473552} +{"current_steps": 35505, "total_steps": 37885, "loss": 0.0, "lr": 2.396840368611852e-08, "epoch": 4.685891513791738, "percentage": 93.72, "elapsed_time": "0:52:14", "remaining_time": "0:03:30", "throughput": 5574.78, "total_tokens": 17475600} +{"current_steps": 35510, "total_steps": 37885, "loss": 0.001, "lr": 2.3868246057323515e-08, "epoch": 4.686551405569487, "percentage": 93.73, "elapsed_time": "0:52:15", "remaining_time": "0:03:29", "throughput": 5575.03, "total_tokens": 17478224} +{"current_steps": 35515, "total_steps": 37885, "loss": 0.1067, "lr": 2.3768295605424703e-08, "epoch": 4.687211297347235, "percentage": 93.74, "elapsed_time": "0:52:15", "remaining_time": "0:03:29", "throughput": 5575.22, "total_tokens": 17480656} +{"current_steps": 35520, "total_steps": 37885, "loss": 0.0, "lr": 2.3668552351635896e-08, "epoch": 4.687871189124984, "percentage": 93.76, "elapsed_time": "0:52:15", "remaining_time": "0:03:28", "throughput": 5575.37, "total_tokens": 17482960} +{"current_steps": 35525, "total_steps": 37885, "loss": 0.0, "lr": 2.356901631712671e-08, "epoch": 4.688531080902732, "percentage": 93.77, "elapsed_time": "0:52:16", "remaining_time": "0:03:28", "throughput": 5575.64, "total_tokens": 17485648} +{"current_steps": 35530, "total_steps": 37885, "loss": 0.0518, "lr": 2.346968752302303e-08, "epoch": 4.68919097268048, "percentage": 93.78, "elapsed_time": "0:52:16", "remaining_time": "0:03:27", "throughput": 5575.86, "total_tokens": 17488208} +{"current_steps": 35535, "total_steps": 37885, "loss": 0.0, "lr": 2.3370565990406877e-08, "epoch": 4.689850864458229, "percentage": 93.8, "elapsed_time": "0:52:16", "remaining_time": "0:03:27", "throughput": 5576.07, "total_tokens": 17490704} +{"current_steps": 35540, "total_steps": 37885, "loss": 0.0, "lr": 2.3271651740315755e-08, "epoch": 4.690510756235978, "percentage": 93.81, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.23, "total_tokens": 17493008} +{"current_steps": 35545, "total_steps": 37885, "loss": 0.0188, "lr": 2.3172944793743653e-08, "epoch": 4.691170648013726, "percentage": 93.82, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.4, "total_tokens": 17495376} +{"current_steps": 35550, "total_steps": 37885, "loss": 0.0, "lr": 2.3074445171640366e-08, "epoch": 4.691830539791474, "percentage": 93.84, "elapsed_time": "0:52:17", "remaining_time": "0:03:26", "throughput": 5576.53, "total_tokens": 17497616} +{"current_steps": 35555, "total_steps": 37885, "loss": 0.0252, "lr": 2.2976152894911838e-08, "epoch": 4.692490431569222, "percentage": 93.85, "elapsed_time": "0:52:18", "remaining_time": "0:03:25", "throughput": 5576.82, "total_tokens": 17500368} +{"current_steps": 35560, "total_steps": 37885, "loss": 0.0261, "lr": 2.2878067984419825e-08, "epoch": 4.693150323346972, "percentage": 93.86, "elapsed_time": "0:52:18", "remaining_time": "0:03:25", "throughput": 5576.99, "total_tokens": 17502736} +{"current_steps": 35565, "total_steps": 37885, "loss": 0.0, "lr": 2.2780190460981896e-08, "epoch": 4.69381021512472, "percentage": 93.88, "elapsed_time": "0:52:18", "remaining_time": "0:03:24", "throughput": 5577.2, "total_tokens": 17505232} +{"current_steps": 35570, "total_steps": 37885, "loss": 0.0, "lr": 2.2682520345372325e-08, "epoch": 4.694470106902468, "percentage": 93.89, "elapsed_time": "0:52:19", "remaining_time": "0:03:24", "throughput": 5577.37, "total_tokens": 17507600} +{"current_steps": 35575, "total_steps": 37885, "loss": 0.0003, "lr": 2.258505765832064e-08, "epoch": 4.695129998680216, "percentage": 93.9, "elapsed_time": "0:52:19", "remaining_time": "0:03:23", "throughput": 5577.56, "total_tokens": 17510032} +{"current_steps": 35580, "total_steps": 37885, "loss": 0.0001, "lr": 2.248780242051229e-08, "epoch": 4.695789890457965, "percentage": 93.92, "elapsed_time": "0:52:19", "remaining_time": "0:03:23", "throughput": 5577.86, "total_tokens": 17512848} +{"current_steps": 35585, "total_steps": 37885, "loss": 0.0, "lr": 2.239075465258966e-08, "epoch": 4.696449782235713, "percentage": 93.93, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.07, "total_tokens": 17515344} +{"current_steps": 35590, "total_steps": 37885, "loss": 0.0, "lr": 2.2293914375149824e-08, "epoch": 4.697109674013462, "percentage": 93.94, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.26, "total_tokens": 17517776} +{"current_steps": 35595, "total_steps": 37885, "loss": 0.0, "lr": 2.2197281608746787e-08, "epoch": 4.69776956579121, "percentage": 93.96, "elapsed_time": "0:52:20", "remaining_time": "0:03:22", "throughput": 5578.46, "total_tokens": 17520272} +{"current_steps": 35600, "total_steps": 37885, "loss": 0.0062, "lr": 2.210085637388992e-08, "epoch": 4.698429457568959, "percentage": 93.97, "elapsed_time": "0:52:21", "remaining_time": "0:03:21", "throughput": 5578.6, "total_tokens": 17522512} +{"current_steps": 35605, "total_steps": 37885, "loss": 0.0, "lr": 2.2004638691044962e-08, "epoch": 4.699089349346707, "percentage": 93.98, "elapsed_time": "0:52:21", "remaining_time": "0:03:21", "throughput": 5578.88, "total_tokens": 17525264} +{"current_steps": 35610, "total_steps": 37885, "loss": 0.0338, "lr": 2.190862858063347e-08, "epoch": 4.699749241124455, "percentage": 93.99, "elapsed_time": "0:52:21", "remaining_time": "0:03:20", "throughput": 5579.03, "total_tokens": 17527568} +{"current_steps": 35615, "total_steps": 37885, "loss": 0.0, "lr": 2.1812826063032584e-08, "epoch": 4.700409132902204, "percentage": 94.01, "elapsed_time": "0:52:22", "remaining_time": "0:03:20", "throughput": 5579.24, "total_tokens": 17530064} +{"current_steps": 35620, "total_steps": 37885, "loss": 0.0, "lr": 2.1717231158576045e-08, "epoch": 4.701069024679953, "percentage": 94.02, "elapsed_time": "0:52:22", "remaining_time": "0:03:19", "throughput": 5579.48, "total_tokens": 17532688} +{"current_steps": 35625, "total_steps": 37885, "loss": 0.0, "lr": 2.1621843887552948e-08, "epoch": 4.701728916457701, "percentage": 94.03, "elapsed_time": "0:52:22", "remaining_time": "0:03:19", "throughput": 5579.68, "total_tokens": 17535120} +{"current_steps": 35630, "total_steps": 37885, "loss": 0.0, "lr": 2.1526664270208662e-08, "epoch": 4.702388808235449, "percentage": 94.05, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5579.85, "total_tokens": 17537488} +{"current_steps": 35635, "total_steps": 37885, "loss": 0.0, "lr": 2.1431692326744244e-08, "epoch": 4.7030487000131975, "percentage": 94.06, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5580.11, "total_tokens": 17540176} +{"current_steps": 35640, "total_steps": 37885, "loss": 0.0, "lr": 2.1336928077317017e-08, "epoch": 4.703708591790946, "percentage": 94.07, "elapsed_time": "0:52:23", "remaining_time": "0:03:18", "throughput": 5580.32, "total_tokens": 17542672} +{"current_steps": 35645, "total_steps": 37885, "loss": 0.0016, "lr": 2.1242371542039893e-08, "epoch": 4.704368483568695, "percentage": 94.09, "elapsed_time": "0:52:23", "remaining_time": "0:03:17", "throughput": 5580.43, "total_tokens": 17544848} +{"current_steps": 35650, "total_steps": 37885, "loss": 0.0, "lr": 2.1148022740981708e-08, "epoch": 4.705028375346443, "percentage": 94.1, "elapsed_time": "0:52:24", "remaining_time": "0:03:17", "throughput": 5580.64, "total_tokens": 17547344} +{"current_steps": 35655, "total_steps": 37885, "loss": 0.0, "lr": 2.1053881694167442e-08, "epoch": 4.7056882671241915, "percentage": 94.11, "elapsed_time": "0:52:24", "remaining_time": "0:03:16", "throughput": 5580.88, "total_tokens": 17549968} +{"current_steps": 35660, "total_steps": 37885, "loss": 0.02, "lr": 2.095994842157789e-08, "epoch": 4.70634815890194, "percentage": 94.13, "elapsed_time": "0:52:24", "remaining_time": "0:03:16", "throughput": 5581.03, "total_tokens": 17552272} +{"current_steps": 35665, "total_steps": 37885, "loss": 0.0002, "lr": 2.086622294314955e-08, "epoch": 4.707008050679688, "percentage": 94.14, "elapsed_time": "0:52:25", "remaining_time": "0:03:15", "throughput": 5581.24, "total_tokens": 17554768} +{"current_steps": 35670, "total_steps": 37885, "loss": 0.0, "lr": 2.077270527877495e-08, "epoch": 4.707667942457437, "percentage": 94.15, "elapsed_time": "0:52:25", "remaining_time": "0:03:15", "throughput": 5581.41, "total_tokens": 17557136} +{"current_steps": 35675, "total_steps": 37885, "loss": 0.0164, "lr": 2.067939544830277e-08, "epoch": 4.7083278342351855, "percentage": 94.17, "elapsed_time": "0:52:25", "remaining_time": "0:03:14", "throughput": 5581.64, "total_tokens": 17559696} +{"current_steps": 35680, "total_steps": 37885, "loss": 0.0, "lr": 2.0586293471537287e-08, "epoch": 4.708987726012934, "percentage": 94.18, "elapsed_time": "0:52:26", "remaining_time": "0:03:14", "throughput": 5581.83, "total_tokens": 17562128} +{"current_steps": 35685, "total_steps": 37885, "loss": 0.0295, "lr": 2.0493399368238573e-08, "epoch": 4.709647617790682, "percentage": 94.19, "elapsed_time": "0:52:26", "remaining_time": "0:03:13", "throughput": 5582.18, "total_tokens": 17565136} +{"current_steps": 35690, "total_steps": 37885, "loss": 0.0001, "lr": 2.0400713158122863e-08, "epoch": 4.71030750956843, "percentage": 94.21, "elapsed_time": "0:52:26", "remaining_time": "0:03:13", "throughput": 5582.48, "total_tokens": 17567952} +{"current_steps": 35695, "total_steps": 37885, "loss": 0.0, "lr": 2.0308234860862084e-08, "epoch": 4.7109674013461795, "percentage": 94.22, "elapsed_time": "0:52:27", "remaining_time": "0:03:13", "throughput": 5582.62, "total_tokens": 17570256} +{"current_steps": 35700, "total_steps": 37885, "loss": 0.0, "lr": 2.021596449608409e-08, "epoch": 4.711627293123928, "percentage": 94.23, "elapsed_time": "0:52:27", "remaining_time": "0:03:12", "throughput": 5582.79, "total_tokens": 17572624} +{"current_steps": 35705, "total_steps": 37885, "loss": 0.0, "lr": 2.0123902083372557e-08, "epoch": 4.712287184901676, "percentage": 94.25, "elapsed_time": "0:52:27", "remaining_time": "0:03:12", "throughput": 5582.9, "total_tokens": 17574800} +{"current_steps": 35710, "total_steps": 37885, "loss": 0.0766, "lr": 2.003204764226718e-08, "epoch": 4.712947076679424, "percentage": 94.26, "elapsed_time": "0:52:28", "remaining_time": "0:03:11", "throughput": 5583.13, "total_tokens": 17577360} +{"current_steps": 35715, "total_steps": 37885, "loss": 0.0, "lr": 1.9940401192263146e-08, "epoch": 4.713606968457173, "percentage": 94.27, "elapsed_time": "0:52:28", "remaining_time": "0:03:11", "throughput": 5583.33, "total_tokens": 17579856} +{"current_steps": 35720, "total_steps": 37885, "loss": 0.0, "lr": 1.9848962752812006e-08, "epoch": 4.714266860234922, "percentage": 94.29, "elapsed_time": "0:52:28", "remaining_time": "0:03:10", "throughput": 5583.5, "total_tokens": 17582224} +{"current_steps": 35725, "total_steps": 37885, "loss": 0.0, "lr": 1.9757732343320898e-08, "epoch": 4.71492675201267, "percentage": 94.3, "elapsed_time": "0:52:29", "remaining_time": "0:03:10", "throughput": 5583.69, "total_tokens": 17584656} +{"current_steps": 35730, "total_steps": 37885, "loss": 0.0, "lr": 1.9666709983152674e-08, "epoch": 4.715586643790418, "percentage": 94.31, "elapsed_time": "0:52:29", "remaining_time": "0:03:09", "throughput": 5583.91, "total_tokens": 17587152} +{"current_steps": 35735, "total_steps": 37885, "loss": 0.0, "lr": 1.957589569162632e-08, "epoch": 4.716246535568167, "percentage": 94.32, "elapsed_time": "0:52:29", "remaining_time": "0:03:09", "throughput": 5584.08, "total_tokens": 17589520} +{"current_steps": 35740, "total_steps": 37885, "loss": 0.0579, "lr": 1.948528948801631e-08, "epoch": 4.716906427345915, "percentage": 94.34, "elapsed_time": "0:52:30", "remaining_time": "0:03:09", "throughput": 5584.23, "total_tokens": 17591824} +{"current_steps": 35745, "total_steps": 37885, "loss": 0.0, "lr": 1.939489139155337e-08, "epoch": 4.717566319123664, "percentage": 94.35, "elapsed_time": "0:52:30", "remaining_time": "0:03:08", "throughput": 5584.49, "total_tokens": 17594512} +{"current_steps": 35750, "total_steps": 37885, "loss": 0.0, "lr": 1.9304701421423707e-08, "epoch": 4.718226210901412, "percentage": 94.36, "elapsed_time": "0:52:30", "remaining_time": "0:03:08", "throughput": 5584.66, "total_tokens": 17596880} +{"current_steps": 35755, "total_steps": 37885, "loss": 0.0, "lr": 1.921471959676957e-08, "epoch": 4.718886102679161, "percentage": 94.38, "elapsed_time": "0:52:31", "remaining_time": "0:03:07", "throughput": 5584.9, "total_tokens": 17599504} +{"current_steps": 35760, "total_steps": 37885, "loss": 0.02, "lr": 1.9124945936688896e-08, "epoch": 4.719545994456909, "percentage": 94.39, "elapsed_time": "0:52:31", "remaining_time": "0:03:07", "throughput": 5585.07, "total_tokens": 17601872} +{"current_steps": 35765, "total_steps": 37885, "loss": 0.0016, "lr": 1.903538046023545e-08, "epoch": 4.720205886234657, "percentage": 94.4, "elapsed_time": "0:52:31", "remaining_time": "0:03:06", "throughput": 5585.33, "total_tokens": 17604560} +{"current_steps": 35770, "total_steps": 37885, "loss": 0.0001, "lr": 1.8946023186419025e-08, "epoch": 4.720865778012406, "percentage": 94.42, "elapsed_time": "0:52:32", "remaining_time": "0:03:06", "throughput": 5585.44, "total_tokens": 17606736} +{"current_steps": 35775, "total_steps": 37885, "loss": 0.0, "lr": 1.885687413420478e-08, "epoch": 4.721525669790155, "percentage": 94.43, "elapsed_time": "0:52:32", "remaining_time": "0:03:05", "throughput": 5585.68, "total_tokens": 17609360} +{"current_steps": 35780, "total_steps": 37885, "loss": 0.0, "lr": 1.876793332251425e-08, "epoch": 4.722185561567903, "percentage": 94.44, "elapsed_time": "0:52:32", "remaining_time": "0:03:05", "throughput": 5585.81, "total_tokens": 17611600} +{"current_steps": 35785, "total_steps": 37885, "loss": 0.0, "lr": 1.8679200770224445e-08, "epoch": 4.722845453345651, "percentage": 94.46, "elapsed_time": "0:52:33", "remaining_time": "0:03:05", "throughput": 5586.05, "total_tokens": 17614224} +{"current_steps": 35790, "total_steps": 37885, "loss": 0.1016, "lr": 1.859067649616797e-08, "epoch": 4.7235053451233995, "percentage": 94.47, "elapsed_time": "0:52:33", "remaining_time": "0:03:04", "throughput": 5586.24, "total_tokens": 17616656} +{"current_steps": 35795, "total_steps": 37885, "loss": 0.0009, "lr": 1.8502360519133564e-08, "epoch": 4.724165236901149, "percentage": 94.48, "elapsed_time": "0:52:33", "remaining_time": "0:03:04", "throughput": 5586.57, "total_tokens": 17619600} +{"current_steps": 35800, "total_steps": 37885, "loss": 0.0, "lr": 1.8414252857865688e-08, "epoch": 4.724825128678897, "percentage": 94.5, "elapsed_time": "0:52:34", "remaining_time": "0:03:03", "throughput": 5586.8, "total_tokens": 17622160} +{"current_steps": 35805, "total_steps": 37885, "loss": 0.0, "lr": 1.8326353531064708e-08, "epoch": 4.725485020456645, "percentage": 94.51, "elapsed_time": "0:52:34", "remaining_time": "0:03:03", "throughput": 5587.02, "total_tokens": 17624720} +{"current_steps": 35810, "total_steps": 37885, "loss": 0.0, "lr": 1.8238662557386262e-08, "epoch": 4.7261449122343935, "percentage": 94.52, "elapsed_time": "0:52:34", "remaining_time": "0:03:02", "throughput": 5587.24, "total_tokens": 17627280} +{"current_steps": 35815, "total_steps": 37885, "loss": 0.0, "lr": 1.8151179955442463e-08, "epoch": 4.726804804012142, "percentage": 94.54, "elapsed_time": "0:52:35", "remaining_time": "0:03:02", "throughput": 5587.52, "total_tokens": 17630032} +{"current_steps": 35820, "total_steps": 37885, "loss": 0.0337, "lr": 1.806390574380079e-08, "epoch": 4.727464695789891, "percentage": 94.55, "elapsed_time": "0:52:35", "remaining_time": "0:03:01", "throughput": 5587.78, "total_tokens": 17632720} +{"current_steps": 35825, "total_steps": 37885, "loss": 0.0, "lr": 1.797683994098431e-08, "epoch": 4.728124587567639, "percentage": 94.56, "elapsed_time": "0:52:35", "remaining_time": "0:03:01", "throughput": 5588.01, "total_tokens": 17635280} +{"current_steps": 35830, "total_steps": 37885, "loss": 0.0, "lr": 1.7889982565472473e-08, "epoch": 4.7287844793453875, "percentage": 94.58, "elapsed_time": "0:52:36", "remaining_time": "0:03:01", "throughput": 5588.24, "total_tokens": 17637840} +{"current_steps": 35835, "total_steps": 37885, "loss": 0.0001, "lr": 1.780333363569986e-08, "epoch": 4.729444371123136, "percentage": 94.59, "elapsed_time": "0:52:36", "remaining_time": "0:03:00", "throughput": 5588.41, "total_tokens": 17640208} +{"current_steps": 35840, "total_steps": 37885, "loss": 0.0, "lr": 1.77168931700572e-08, "epoch": 4.730104262900884, "percentage": 94.6, "elapsed_time": "0:52:36", "remaining_time": "0:03:00", "throughput": 5588.63, "total_tokens": 17642768} +{"current_steps": 35845, "total_steps": 37885, "loss": 0.0647, "lr": 1.7630661186890827e-08, "epoch": 4.730764154678632, "percentage": 94.62, "elapsed_time": "0:52:37", "remaining_time": "0:02:59", "throughput": 5588.79, "total_tokens": 17645136} +{"current_steps": 35850, "total_steps": 37885, "loss": 0.0, "lr": 1.7544637704502875e-08, "epoch": 4.7314240464563815, "percentage": 94.63, "elapsed_time": "0:52:37", "remaining_time": "0:02:59", "throughput": 5588.97, "total_tokens": 17647504} +{"current_steps": 35855, "total_steps": 37885, "loss": 0.0, "lr": 1.745882274115118e-08, "epoch": 4.73208393823413, "percentage": 94.64, "elapsed_time": "0:52:37", "remaining_time": "0:02:58", "throughput": 5589.12, "total_tokens": 17649808} +{"current_steps": 35860, "total_steps": 37885, "loss": 0.0, "lr": 1.7373216315049288e-08, "epoch": 4.732743830011878, "percentage": 94.65, "elapsed_time": "0:52:38", "remaining_time": "0:02:58", "throughput": 5589.42, "total_tokens": 17652624} +{"current_steps": 35865, "total_steps": 37885, "loss": 0.0, "lr": 1.7287818444366663e-08, "epoch": 4.733403721789626, "percentage": 94.67, "elapsed_time": "0:52:38", "remaining_time": "0:02:57", "throughput": 5589.66, "total_tokens": 17655248} +{"current_steps": 35870, "total_steps": 37885, "loss": 0.0032, "lr": 1.7202629147228365e-08, "epoch": 4.734063613567375, "percentage": 94.68, "elapsed_time": "0:52:38", "remaining_time": "0:02:57", "throughput": 5589.87, "total_tokens": 17657744} +{"current_steps": 35875, "total_steps": 37885, "loss": 0.0266, "lr": 1.711764844171515e-08, "epoch": 4.734723505345124, "percentage": 94.69, "elapsed_time": "0:52:39", "remaining_time": "0:02:57", "throughput": 5590.13, "total_tokens": 17660432} +{"current_steps": 35880, "total_steps": 37885, "loss": 0.0023, "lr": 1.7032876345863588e-08, "epoch": 4.735383397122872, "percentage": 94.71, "elapsed_time": "0:52:39", "remaining_time": "0:02:56", "throughput": 5590.27, "total_tokens": 17662736} +{"current_steps": 35885, "total_steps": 37885, "loss": 0.0, "lr": 1.694831287766596e-08, "epoch": 4.73604328890062, "percentage": 94.72, "elapsed_time": "0:52:39", "remaining_time": "0:02:56", "throughput": 5590.43, "total_tokens": 17665040} +{"current_steps": 35890, "total_steps": 37885, "loss": 0.0149, "lr": 1.6863958055070126e-08, "epoch": 4.736703180678369, "percentage": 94.73, "elapsed_time": "0:52:40", "remaining_time": "0:02:55", "throughput": 5590.5, "total_tokens": 17667088} +{"current_steps": 35895, "total_steps": 37885, "loss": 0.0, "lr": 1.677981189597988e-08, "epoch": 4.737363072456117, "percentage": 94.75, "elapsed_time": "0:52:40", "remaining_time": "0:02:55", "throughput": 5590.67, "total_tokens": 17669456} +{"current_steps": 35900, "total_steps": 37885, "loss": 0.0, "lr": 1.6695874418254707e-08, "epoch": 4.738022964233865, "percentage": 94.76, "elapsed_time": "0:52:40", "remaining_time": "0:02:54", "throughput": 5590.82, "total_tokens": 17671760} +{"current_steps": 35905, "total_steps": 37885, "loss": 0.0, "lr": 1.6612145639709696e-08, "epoch": 4.738682856011614, "percentage": 94.77, "elapsed_time": "0:52:41", "remaining_time": "0:02:54", "throughput": 5591.04, "total_tokens": 17674320} +{"current_steps": 35910, "total_steps": 37885, "loss": 0.0239, "lr": 1.652862557811563e-08, "epoch": 4.739342747789363, "percentage": 94.79, "elapsed_time": "0:52:41", "remaining_time": "0:02:53", "throughput": 5591.21, "total_tokens": 17676688} +{"current_steps": 35915, "total_steps": 37885, "loss": 0.0, "lr": 1.6445314251198884e-08, "epoch": 4.740002639567111, "percentage": 94.8, "elapsed_time": "0:52:41", "remaining_time": "0:02:53", "throughput": 5591.3, "total_tokens": 17678800} +{"current_steps": 35920, "total_steps": 37885, "loss": 0.0001, "lr": 1.636221167664209e-08, "epoch": 4.740662531344859, "percentage": 94.81, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.5, "total_tokens": 17681296} +{"current_steps": 35925, "total_steps": 37885, "loss": 0.0, "lr": 1.6279317872082697e-08, "epoch": 4.741322423122607, "percentage": 94.83, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.69, "total_tokens": 17683728} +{"current_steps": 35930, "total_steps": 37885, "loss": 0.0, "lr": 1.6196632855114745e-08, "epoch": 4.741982314900357, "percentage": 94.84, "elapsed_time": "0:52:42", "remaining_time": "0:02:52", "throughput": 5591.86, "total_tokens": 17686096} +{"current_steps": 35935, "total_steps": 37885, "loss": 0.0001, "lr": 1.611415664328708e-08, "epoch": 4.742642206678105, "percentage": 94.85, "elapsed_time": "0:52:43", "remaining_time": "0:02:51", "throughput": 5592.05, "total_tokens": 17688528} +{"current_steps": 35940, "total_steps": 37885, "loss": 0.0, "lr": 1.6031889254105148e-08, "epoch": 4.743302098455853, "percentage": 94.87, "elapsed_time": "0:52:43", "remaining_time": "0:02:51", "throughput": 5592.27, "total_tokens": 17691088} +{"current_steps": 35945, "total_steps": 37885, "loss": 0.0381, "lr": 1.594983070502942e-08, "epoch": 4.743961990233601, "percentage": 94.88, "elapsed_time": "0:52:43", "remaining_time": "0:02:50", "throughput": 5592.43, "total_tokens": 17693392} +{"current_steps": 35950, "total_steps": 37885, "loss": 0.0, "lr": 1.5867981013475974e-08, "epoch": 4.74462188201135, "percentage": 94.89, "elapsed_time": "0:52:44", "remaining_time": "0:02:50", "throughput": 5592.61, "total_tokens": 17695824} +{"current_steps": 35955, "total_steps": 37885, "loss": 0.0177, "lr": 1.5786340196817127e-08, "epoch": 4.745281773789099, "percentage": 94.91, "elapsed_time": "0:52:44", "remaining_time": "0:02:49", "throughput": 5592.73, "total_tokens": 17698000} +{"current_steps": 35960, "total_steps": 37885, "loss": 0.0, "lr": 1.570490827238047e-08, "epoch": 4.745941665566847, "percentage": 94.92, "elapsed_time": "0:52:44", "remaining_time": "0:02:49", "throughput": 5592.84, "total_tokens": 17700176} +{"current_steps": 35965, "total_steps": 37885, "loss": 0.0001, "lr": 1.562368525744939e-08, "epoch": 4.746601557344595, "percentage": 94.93, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.1, "total_tokens": 17702864} +{"current_steps": 35970, "total_steps": 37885, "loss": 0.0032, "lr": 1.5542671169262667e-08, "epoch": 4.747261449122344, "percentage": 94.95, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.28, "total_tokens": 17705296} +{"current_steps": 35975, "total_steps": 37885, "loss": 0.0, "lr": 1.5461866025015202e-08, "epoch": 4.747921340900092, "percentage": 94.96, "elapsed_time": "0:52:45", "remaining_time": "0:02:48", "throughput": 5593.52, "total_tokens": 17707920} +{"current_steps": 35980, "total_steps": 37885, "loss": 0.0, "lr": 1.5381269841857282e-08, "epoch": 4.748581232677841, "percentage": 94.97, "elapsed_time": "0:52:46", "remaining_time": "0:02:47", "throughput": 5593.71, "total_tokens": 17710352} +{"current_steps": 35985, "total_steps": 37885, "loss": 0.0, "lr": 1.5300882636894662e-08, "epoch": 4.749241124455589, "percentage": 94.98, "elapsed_time": "0:52:46", "remaining_time": "0:02:47", "throughput": 5593.8, "total_tokens": 17712464} +{"current_steps": 35990, "total_steps": 37885, "loss": 0.0426, "lr": 1.5220704427189145e-08, "epoch": 4.749901016233338, "percentage": 95.0, "elapsed_time": "0:52:46", "remaining_time": "0:02:46", "throughput": 5593.96, "total_tokens": 17714832} +{"current_steps": 35995, "total_steps": 37885, "loss": 0.075, "lr": 1.5140735229757893e-08, "epoch": 4.750560908011086, "percentage": 95.01, "elapsed_time": "0:52:47", "remaining_time": "0:02:46", "throughput": 5594.13, "total_tokens": 17717200} +{"current_steps": 36000, "total_steps": 37885, "loss": 0.0, "lr": 1.5060975061573777e-08, "epoch": 4.751220799788834, "percentage": 95.02, "elapsed_time": "0:52:47", "remaining_time": "0:02:45", "throughput": 5594.26, "total_tokens": 17719440} +{"current_steps": 36005, "total_steps": 37885, "loss": 0.0, "lr": 1.4981423939565364e-08, "epoch": 4.751880691566583, "percentage": 95.04, "elapsed_time": "0:52:47", "remaining_time": "0:02:45", "throughput": 5594.41, "total_tokens": 17721744} +{"current_steps": 36005, "total_steps": 37885, "eval_loss": 0.2836270332336426, "epoch": 4.751880691566583, "percentage": 95.04, "elapsed_time": "0:52:55", "remaining_time": "0:02:45", "throughput": 5580.67, "total_tokens": 17721744} +{"current_steps": 36010, "total_steps": 37885, "loss": 0.0, "lr": 1.49020818806167e-08, "epoch": 4.752540583344332, "percentage": 95.05, "elapsed_time": "0:53:33", "remaining_time": "0:02:47", "throughput": 5515.1, "total_tokens": 17724048} +{"current_steps": 36015, "total_steps": 37885, "loss": 0.0, "lr": 1.4822948901567767e-08, "epoch": 4.75320047512208, "percentage": 95.06, "elapsed_time": "0:53:34", "remaining_time": "0:02:46", "throughput": 5515.34, "total_tokens": 17726672} +{"current_steps": 36020, "total_steps": 37885, "loss": 0.006, "lr": 1.474402501921368e-08, "epoch": 4.753860366899828, "percentage": 95.08, "elapsed_time": "0:53:34", "remaining_time": "0:02:46", "throughput": 5515.55, "total_tokens": 17729168} +{"current_steps": 36025, "total_steps": 37885, "loss": 0.0001, "lr": 1.4665310250305708e-08, "epoch": 4.7545202586775765, "percentage": 95.09, "elapsed_time": "0:53:34", "remaining_time": "0:02:45", "throughput": 5515.76, "total_tokens": 17731664} +{"current_steps": 36030, "total_steps": 37885, "loss": 0.001, "lr": 1.4586804611550484e-08, "epoch": 4.755180150455326, "percentage": 95.1, "elapsed_time": "0:53:35", "remaining_time": "0:02:45", "throughput": 5515.98, "total_tokens": 17734224} +{"current_steps": 36035, "total_steps": 37885, "loss": 0.0, "lr": 1.4508508119610019e-08, "epoch": 4.755840042233074, "percentage": 95.12, "elapsed_time": "0:53:35", "remaining_time": "0:02:45", "throughput": 5516.18, "total_tokens": 17736656} +{"current_steps": 36040, "total_steps": 37885, "loss": 0.0, "lr": 1.4430420791102461e-08, "epoch": 4.756499934010822, "percentage": 95.13, "elapsed_time": "0:53:35", "remaining_time": "0:02:44", "throughput": 5516.29, "total_tokens": 17738832} +{"current_steps": 36045, "total_steps": 37885, "loss": 0.0, "lr": 1.4352542642601106e-08, "epoch": 4.7571598257885706, "percentage": 95.14, "elapsed_time": "0:53:36", "remaining_time": "0:02:44", "throughput": 5516.5, "total_tokens": 17741328} +{"current_steps": 36050, "total_steps": 37885, "loss": 0.0001, "lr": 1.427487369063507e-08, "epoch": 4.757819717566319, "percentage": 95.16, "elapsed_time": "0:53:36", "remaining_time": "0:02:43", "throughput": 5516.74, "total_tokens": 17743952} +{"current_steps": 36055, "total_steps": 37885, "loss": 0.0001, "lr": 1.4197413951689052e-08, "epoch": 4.758479609344068, "percentage": 95.17, "elapsed_time": "0:53:36", "remaining_time": "0:02:43", "throughput": 5516.97, "total_tokens": 17746512} +{"current_steps": 36060, "total_steps": 37885, "loss": 0.0, "lr": 1.4120163442203237e-08, "epoch": 4.759139501121816, "percentage": 95.18, "elapsed_time": "0:53:37", "remaining_time": "0:02:42", "throughput": 5517.21, "total_tokens": 17749072} +{"current_steps": 36065, "total_steps": 37885, "loss": 0.0, "lr": 1.404312217857373e-08, "epoch": 4.7597993928995646, "percentage": 95.2, "elapsed_time": "0:53:37", "remaining_time": "0:02:42", "throughput": 5517.56, "total_tokens": 17752080} +{"current_steps": 36070, "total_steps": 37885, "loss": 0.0, "lr": 1.3966290177151674e-08, "epoch": 4.760459284677313, "percentage": 95.21, "elapsed_time": "0:53:37", "remaining_time": "0:02:41", "throughput": 5517.81, "total_tokens": 17754704} +{"current_steps": 36075, "total_steps": 37885, "loss": 0.0, "lr": 1.3889667454244136e-08, "epoch": 4.761119176455061, "percentage": 95.22, "elapsed_time": "0:53:38", "remaining_time": "0:02:41", "throughput": 5518.09, "total_tokens": 17757456} +{"current_steps": 36080, "total_steps": 37885, "loss": 0.0, "lr": 1.3813254026113997e-08, "epoch": 4.76177906823281, "percentage": 95.24, "elapsed_time": "0:53:38", "remaining_time": "0:02:41", "throughput": 5518.33, "total_tokens": 17760080} +{"current_steps": 36085, "total_steps": 37885, "loss": 0.0007, "lr": 1.373704990897917e-08, "epoch": 4.7624389600105586, "percentage": 95.25, "elapsed_time": "0:53:38", "remaining_time": "0:02:40", "throughput": 5518.54, "total_tokens": 17762576} +{"current_steps": 36090, "total_steps": 37885, "loss": 0.0, "lr": 1.3661055119013608e-08, "epoch": 4.763098851788307, "percentage": 95.26, "elapsed_time": "0:53:39", "remaining_time": "0:02:40", "throughput": 5518.71, "total_tokens": 17764944} +{"current_steps": 36095, "total_steps": 37885, "loss": 0.0, "lr": 1.3585269672346633e-08, "epoch": 4.763758743566055, "percentage": 95.28, "elapsed_time": "0:53:39", "remaining_time": "0:02:39", "throughput": 5518.92, "total_tokens": 17767440} +{"current_steps": 36100, "total_steps": 37885, "loss": 0.0, "lr": 1.3509693585063042e-08, "epoch": 4.764418635343803, "percentage": 95.29, "elapsed_time": "0:53:39", "remaining_time": "0:02:39", "throughput": 5519.11, "total_tokens": 17769872} +{"current_steps": 36105, "total_steps": 37885, "loss": 0.0, "lr": 1.3434326873203449e-08, "epoch": 4.7650785271215526, "percentage": 95.3, "elapsed_time": "0:53:40", "remaining_time": "0:02:38", "throughput": 5519.28, "total_tokens": 17772240} +{"current_steps": 36110, "total_steps": 37885, "loss": 0.0, "lr": 1.3359169552763727e-08, "epoch": 4.765738418899301, "percentage": 95.31, "elapsed_time": "0:53:40", "remaining_time": "0:02:38", "throughput": 5519.45, "total_tokens": 17774608} +{"current_steps": 36115, "total_steps": 37885, "loss": 0.0, "lr": 1.328422163969567e-08, "epoch": 4.766398310677049, "percentage": 95.33, "elapsed_time": "0:53:40", "remaining_time": "0:02:37", "throughput": 5519.62, "total_tokens": 17776976} +{"current_steps": 36120, "total_steps": 37885, "loss": 0.0, "lr": 1.320948314990633e-08, "epoch": 4.767058202454797, "percentage": 95.34, "elapsed_time": "0:53:41", "remaining_time": "0:02:37", "throughput": 5519.77, "total_tokens": 17779280} +{"current_steps": 36125, "total_steps": 37885, "loss": 0.0, "lr": 1.3134954099258466e-08, "epoch": 4.767718094232546, "percentage": 95.35, "elapsed_time": "0:53:41", "remaining_time": "0:02:36", "throughput": 5519.97, "total_tokens": 17781712} +{"current_steps": 36130, "total_steps": 37885, "loss": 0.0, "lr": 1.306063450357009e-08, "epoch": 4.768377986010294, "percentage": 95.37, "elapsed_time": "0:53:41", "remaining_time": "0:02:36", "throughput": 5520.18, "total_tokens": 17784208} +{"current_steps": 36135, "total_steps": 37885, "loss": 0.0, "lr": 1.298652437861536e-08, "epoch": 4.769037877788043, "percentage": 95.38, "elapsed_time": "0:53:42", "remaining_time": "0:02:36", "throughput": 5520.27, "total_tokens": 17786320} +{"current_steps": 36140, "total_steps": 37885, "loss": 0.0, "lr": 1.2912623740123362e-08, "epoch": 4.769697769565791, "percentage": 95.39, "elapsed_time": "0:53:42", "remaining_time": "0:02:35", "throughput": 5520.44, "total_tokens": 17788688} +{"current_steps": 36145, "total_steps": 37885, "loss": 0.0, "lr": 1.2838932603779107e-08, "epoch": 4.77035766134354, "percentage": 95.41, "elapsed_time": "0:53:42", "remaining_time": "0:02:35", "throughput": 5520.68, "total_tokens": 17791312} +{"current_steps": 36150, "total_steps": 37885, "loss": 0.0, "lr": 1.2765450985222859e-08, "epoch": 4.771017553121288, "percentage": 95.42, "elapsed_time": "0:53:42", "remaining_time": "0:02:34", "throughput": 5520.91, "total_tokens": 17793872} +{"current_steps": 36155, "total_steps": 37885, "loss": 0.0, "lr": 1.269217890005081e-08, "epoch": 4.771677444899036, "percentage": 95.43, "elapsed_time": "0:53:43", "remaining_time": "0:02:34", "throughput": 5521.13, "total_tokens": 17796432} +{"current_steps": 36160, "total_steps": 37885, "loss": 0.0, "lr": 1.2619116363814075e-08, "epoch": 4.772337336676785, "percentage": 95.45, "elapsed_time": "0:53:43", "remaining_time": "0:02:33", "throughput": 5521.32, "total_tokens": 17798864} +{"current_steps": 36165, "total_steps": 37885, "loss": 0.0, "lr": 1.2546263392019917e-08, "epoch": 4.772997228454534, "percentage": 95.46, "elapsed_time": "0:53:43", "remaining_time": "0:02:33", "throughput": 5521.47, "total_tokens": 17801168} +{"current_steps": 36170, "total_steps": 37885, "loss": 0.0, "lr": 1.2473620000130858e-08, "epoch": 4.773657120232282, "percentage": 95.47, "elapsed_time": "0:53:44", "remaining_time": "0:02:32", "throughput": 5521.58, "total_tokens": 17803344} +{"current_steps": 36175, "total_steps": 37885, "loss": 0.0, "lr": 1.2401186203564784e-08, "epoch": 4.77431701201003, "percentage": 95.49, "elapsed_time": "0:53:44", "remaining_time": "0:02:32", "throughput": 5521.79, "total_tokens": 17805840} +{"current_steps": 36180, "total_steps": 37885, "loss": 0.0213, "lr": 1.2328962017695288e-08, "epoch": 4.7749769037877785, "percentage": 95.5, "elapsed_time": "0:53:44", "remaining_time": "0:02:31", "throughput": 5521.99, "total_tokens": 17808336} +{"current_steps": 36185, "total_steps": 37885, "loss": 0.0, "lr": 1.225694745785144e-08, "epoch": 4.775636795565527, "percentage": 95.51, "elapsed_time": "0:53:45", "remaining_time": "0:02:31", "throughput": 5522.22, "total_tokens": 17810960} +{"current_steps": 36190, "total_steps": 37885, "loss": 0.0, "lr": 1.2185142539317905e-08, "epoch": 4.776296687343276, "percentage": 95.53, "elapsed_time": "0:53:45", "remaining_time": "0:02:31", "throughput": 5522.37, "total_tokens": 17813328} +{"current_steps": 36195, "total_steps": 37885, "loss": 0.0, "lr": 1.21135472773346e-08, "epoch": 4.776956579121024, "percentage": 95.54, "elapsed_time": "0:53:45", "remaining_time": "0:02:30", "throughput": 5522.55, "total_tokens": 17815760} +{"current_steps": 36200, "total_steps": 37885, "loss": 0.0, "lr": 1.2042161687097152e-08, "epoch": 4.7776164708987725, "percentage": 95.55, "elapsed_time": "0:53:46", "remaining_time": "0:02:30", "throughput": 5522.78, "total_tokens": 17818384} +{"current_steps": 36205, "total_steps": 37885, "loss": 0.0, "lr": 1.197098578375677e-08, "epoch": 4.778276362676521, "percentage": 95.57, "elapsed_time": "0:53:46", "remaining_time": "0:02:29", "throughput": 5522.94, "total_tokens": 17820752} +{"current_steps": 36210, "total_steps": 37885, "loss": 0.0079, "lr": 1.1900019582419818e-08, "epoch": 4.778936254454269, "percentage": 95.58, "elapsed_time": "0:53:47", "remaining_time": "0:02:29", "throughput": 5523.14, "total_tokens": 17823248} +{"current_steps": 36215, "total_steps": 37885, "loss": 0.0, "lr": 1.1829263098148357e-08, "epoch": 4.779596146232018, "percentage": 95.59, "elapsed_time": "0:53:47", "remaining_time": "0:02:28", "throughput": 5523.32, "total_tokens": 17825680} +{"current_steps": 36220, "total_steps": 37885, "loss": 0.0, "lr": 1.1758716345960263e-08, "epoch": 4.7802560380097665, "percentage": 95.61, "elapsed_time": "0:53:47", "remaining_time": "0:02:28", "throughput": 5523.5, "total_tokens": 17828112} +{"current_steps": 36225, "total_steps": 37885, "loss": 0.0, "lr": 1.1688379340828224e-08, "epoch": 4.780915929787515, "percentage": 95.62, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5523.69, "total_tokens": 17830544} +{"current_steps": 36230, "total_steps": 37885, "loss": 0.0, "lr": 1.1618252097680858e-08, "epoch": 4.781575821565263, "percentage": 95.63, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5523.93, "total_tokens": 17833168} +{"current_steps": 36235, "total_steps": 37885, "loss": 0.0, "lr": 1.1548334631402146e-08, "epoch": 4.782235713343011, "percentage": 95.64, "elapsed_time": "0:53:48", "remaining_time": "0:02:27", "throughput": 5524.09, "total_tokens": 17835536} +{"current_steps": 36240, "total_steps": 37885, "loss": 0.0, "lr": 1.1478626956831771e-08, "epoch": 4.7828956051207605, "percentage": 95.66, "elapsed_time": "0:53:49", "remaining_time": "0:02:26", "throughput": 5524.19, "total_tokens": 17837712} +{"current_steps": 36245, "total_steps": 37885, "loss": 0.0, "lr": 1.1409129088764346e-08, "epoch": 4.783555496898509, "percentage": 95.67, "elapsed_time": "0:53:49", "remaining_time": "0:02:26", "throughput": 5524.49, "total_tokens": 17840528} +{"current_steps": 36250, "total_steps": 37885, "loss": 0.0, "lr": 1.1339841041950516e-08, "epoch": 4.784215388676257, "percentage": 95.68, "elapsed_time": "0:53:49", "remaining_time": "0:02:25", "throughput": 5524.65, "total_tokens": 17842896} +{"current_steps": 36255, "total_steps": 37885, "loss": 0.0, "lr": 1.1270762831096182e-08, "epoch": 4.784875280454005, "percentage": 95.7, "elapsed_time": "0:53:50", "remaining_time": "0:02:25", "throughput": 5524.89, "total_tokens": 17845520} +{"current_steps": 36260, "total_steps": 37885, "loss": 0.0, "lr": 1.1201894470862504e-08, "epoch": 4.785535172231754, "percentage": 95.71, "elapsed_time": "0:53:50", "remaining_time": "0:02:24", "throughput": 5525.14, "total_tokens": 17848144} +{"current_steps": 36265, "total_steps": 37885, "loss": 0.0, "lr": 1.1133235975866572e-08, "epoch": 4.786195064009503, "percentage": 95.72, "elapsed_time": "0:53:50", "remaining_time": "0:02:24", "throughput": 5525.24, "total_tokens": 17850320} +{"current_steps": 36270, "total_steps": 37885, "loss": 0.0, "lr": 1.1064787360680282e-08, "epoch": 4.786854955787251, "percentage": 95.74, "elapsed_time": "0:53:51", "remaining_time": "0:02:23", "throughput": 5525.45, "total_tokens": 17852816} +{"current_steps": 36275, "total_steps": 37885, "loss": 0.0, "lr": 1.0996548639831793e-08, "epoch": 4.787514847564999, "percentage": 95.75, "elapsed_time": "0:53:51", "remaining_time": "0:02:23", "throughput": 5525.62, "total_tokens": 17855248} +{"current_steps": 36280, "total_steps": 37885, "loss": 0.0, "lr": 1.0928519827803961e-08, "epoch": 4.788174739342748, "percentage": 95.76, "elapsed_time": "0:53:51", "remaining_time": "0:02:22", "throughput": 5525.78, "total_tokens": 17857616} +{"current_steps": 36285, "total_steps": 37885, "loss": 0.0, "lr": 1.086070093903535e-08, "epoch": 4.788834631120496, "percentage": 95.78, "elapsed_time": "0:53:52", "remaining_time": "0:02:22", "throughput": 5526.02, "total_tokens": 17860240} +{"current_steps": 36290, "total_steps": 37885, "loss": 0.0, "lr": 1.0793091987920444e-08, "epoch": 4.789494522898245, "percentage": 95.79, "elapsed_time": "0:53:52", "remaining_time": "0:02:22", "throughput": 5526.25, "total_tokens": 17862800} +{"current_steps": 36295, "total_steps": 37885, "loss": 0.0, "lr": 1.0725692988808322e-08, "epoch": 4.790154414675993, "percentage": 95.8, "elapsed_time": "0:53:52", "remaining_time": "0:02:21", "throughput": 5526.39, "total_tokens": 17865168} +{"current_steps": 36300, "total_steps": 37885, "loss": 0.0, "lr": 1.0658503956004206e-08, "epoch": 4.790814306453742, "percentage": 95.82, "elapsed_time": "0:53:53", "remaining_time": "0:02:21", "throughput": 5526.59, "total_tokens": 17867664} +{"current_steps": 36305, "total_steps": 37885, "loss": 0.0, "lr": 1.0591524903768245e-08, "epoch": 4.79147419823149, "percentage": 95.83, "elapsed_time": "0:53:53", "remaining_time": "0:02:20", "throughput": 5526.78, "total_tokens": 17870160} +{"current_steps": 36310, "total_steps": 37885, "loss": 0.0, "lr": 1.0524755846316402e-08, "epoch": 4.792134090009238, "percentage": 95.84, "elapsed_time": "0:53:53", "remaining_time": "0:02:20", "throughput": 5526.97, "total_tokens": 17872656} +{"current_steps": 36315, "total_steps": 37885, "loss": 0.0, "lr": 1.0458196797820007e-08, "epoch": 4.792793981786987, "percentage": 95.86, "elapsed_time": "0:53:54", "remaining_time": "0:02:19", "throughput": 5527.21, "total_tokens": 17875280} +{"current_steps": 36320, "total_steps": 37885, "loss": 0.0001, "lr": 1.039184777240565e-08, "epoch": 4.793453873564736, "percentage": 95.87, "elapsed_time": "0:53:54", "remaining_time": "0:02:19", "throughput": 5527.41, "total_tokens": 17877776} +{"current_steps": 36325, "total_steps": 37885, "loss": 0.0, "lr": 1.0325708784155396e-08, "epoch": 4.794113765342484, "percentage": 95.88, "elapsed_time": "0:53:54", "remaining_time": "0:02:18", "throughput": 5527.52, "total_tokens": 17879952} +{"current_steps": 36330, "total_steps": 37885, "loss": 0.0, "lr": 1.0259779847106798e-08, "epoch": 4.794773657120232, "percentage": 95.9, "elapsed_time": "0:53:55", "remaining_time": "0:02:18", "throughput": 5527.75, "total_tokens": 17882512} +{"current_steps": 36335, "total_steps": 37885, "loss": 0.0, "lr": 1.0194060975252772e-08, "epoch": 4.7954335488979805, "percentage": 95.91, "elapsed_time": "0:53:55", "remaining_time": "0:02:18", "throughput": 5527.96, "total_tokens": 17885072} +{"current_steps": 36340, "total_steps": 37885, "loss": 0.0, "lr": 1.0128552182541606e-08, "epoch": 4.79609344067573, "percentage": 95.92, "elapsed_time": "0:53:55", "remaining_time": "0:02:17", "throughput": 5528.16, "total_tokens": 17887568} +{"current_steps": 36345, "total_steps": 37885, "loss": 0.0, "lr": 1.0063253482877287e-08, "epoch": 4.796753332453478, "percentage": 95.94, "elapsed_time": "0:53:56", "remaining_time": "0:02:17", "throughput": 5528.41, "total_tokens": 17890192} +{"current_steps": 36350, "total_steps": 37885, "loss": 0.0005, "lr": 9.998164890118844e-09, "epoch": 4.797413224231226, "percentage": 95.95, "elapsed_time": "0:53:56", "remaining_time": "0:02:16", "throughput": 5528.66, "total_tokens": 17892880} +{"current_steps": 36355, "total_steps": 37885, "loss": 0.0, "lr": 9.933286418080778e-09, "epoch": 4.7980731160089745, "percentage": 95.96, "elapsed_time": "0:53:56", "remaining_time": "0:02:16", "throughput": 5528.83, "total_tokens": 17895376} +{"current_steps": 36360, "total_steps": 37885, "loss": 0.0012, "lr": 9.868618080533298e-09, "epoch": 4.798733007786723, "percentage": 95.97, "elapsed_time": "0:53:57", "remaining_time": "0:02:15", "throughput": 5528.98, "total_tokens": 17897680} +{"current_steps": 36365, "total_steps": 37885, "loss": 0.0011, "lr": 9.804159891201536e-09, "epoch": 4.799392899564472, "percentage": 95.99, "elapsed_time": "0:53:57", "remaining_time": "0:02:15", "throughput": 5529.23, "total_tokens": 17900368} +{"current_steps": 36370, "total_steps": 37885, "loss": 0.0, "lr": 9.739911863766548e-09, "epoch": 4.80005279134222, "percentage": 96.0, "elapsed_time": "0:53:57", "remaining_time": "0:02:14", "throughput": 5529.44, "total_tokens": 17902928} +{"current_steps": 36375, "total_steps": 37885, "loss": 0.0, "lr": 9.675874011864205e-09, "epoch": 4.8007126831199685, "percentage": 96.01, "elapsed_time": "0:53:58", "remaining_time": "0:02:14", "throughput": 5529.65, "total_tokens": 17905488} +{"current_steps": 36380, "total_steps": 37885, "loss": 0.0, "lr": 9.612046349086411e-09, "epoch": 4.801372574897717, "percentage": 96.03, "elapsed_time": "0:53:58", "remaining_time": "0:02:13", "throughput": 5529.82, "total_tokens": 17907920} +{"current_steps": 36385, "total_steps": 37885, "loss": 0.0396, "lr": 9.548428888979775e-09, "epoch": 4.802032466675465, "percentage": 96.04, "elapsed_time": "0:53:58", "remaining_time": "0:02:13", "throughput": 5530.05, "total_tokens": 17910544} +{"current_steps": 36390, "total_steps": 37885, "loss": 0.0054, "lr": 9.485021645046941e-09, "epoch": 4.802692358453213, "percentage": 96.05, "elapsed_time": "0:53:59", "remaining_time": "0:02:13", "throughput": 5530.22, "total_tokens": 17912976} +{"current_steps": 36395, "total_steps": 37885, "loss": 0.0, "lr": 9.421824630745478e-09, "epoch": 4.8033522502309625, "percentage": 96.07, "elapsed_time": "0:53:59", "remaining_time": "0:02:12", "throughput": 5530.37, "total_tokens": 17915280} +{"current_steps": 36400, "total_steps": 37885, "loss": 0.0, "lr": 9.358837859488544e-09, "epoch": 4.804012142008711, "percentage": 96.08, "elapsed_time": "0:53:59", "remaining_time": "0:02:12", "throughput": 5530.52, "total_tokens": 17917648} +{"current_steps": 36405, "total_steps": 37885, "loss": 0.0, "lr": 9.296061344644667e-09, "epoch": 4.804672033786459, "percentage": 96.09, "elapsed_time": "0:54:00", "remaining_time": "0:02:11", "throughput": 5530.65, "total_tokens": 17919952} +{"current_steps": 36410, "total_steps": 37885, "loss": 0.0035, "lr": 9.233495099537525e-09, "epoch": 4.805331925564207, "percentage": 96.11, "elapsed_time": "0:54:00", "remaining_time": "0:02:11", "throughput": 5530.85, "total_tokens": 17922512} +{"current_steps": 36415, "total_steps": 37885, "loss": 0.0005, "lr": 9.171139137446605e-09, "epoch": 4.805991817341956, "percentage": 96.12, "elapsed_time": "0:54:00", "remaining_time": "0:02:10", "throughput": 5531.02, "total_tokens": 17924944} +{"current_steps": 36420, "total_steps": 37885, "loss": 0.0, "lr": 9.10899347160632e-09, "epoch": 4.806651709119705, "percentage": 96.13, "elapsed_time": "0:54:01", "remaining_time": "0:02:10", "throughput": 5531.17, "total_tokens": 17927312} +{"current_steps": 36425, "total_steps": 37885, "loss": 0.0, "lr": 9.047058115206674e-09, "epoch": 4.807311600897453, "percentage": 96.15, "elapsed_time": "0:54:01", "remaining_time": "0:02:09", "throughput": 5531.34, "total_tokens": 17929744} +{"current_steps": 36430, "total_steps": 37885, "loss": 0.028, "lr": 8.985333081393154e-09, "epoch": 4.807971492675201, "percentage": 96.16, "elapsed_time": "0:54:01", "remaining_time": "0:02:09", "throughput": 5531.49, "total_tokens": 17932112} +{"current_steps": 36435, "total_steps": 37885, "loss": 0.0, "lr": 8.923818383266169e-09, "epoch": 4.80863138445295, "percentage": 96.17, "elapsed_time": "0:54:02", "remaining_time": "0:02:09", "throughput": 5531.65, "total_tokens": 17934480} +{"current_steps": 36440, "total_steps": 37885, "loss": 0.0, "lr": 8.862514033882051e-09, "epoch": 4.809291276230698, "percentage": 96.19, "elapsed_time": "0:54:02", "remaining_time": "0:02:08", "throughput": 5531.83, "total_tokens": 17936912} +{"current_steps": 36445, "total_steps": 37885, "loss": 0.0, "lr": 8.80142004625195e-09, "epoch": 4.809951168008446, "percentage": 96.2, "elapsed_time": "0:54:02", "remaining_time": "0:02:08", "throughput": 5532.05, "total_tokens": 17939536} +{"current_steps": 36450, "total_steps": 37885, "loss": 0.0001, "lr": 8.740536433342826e-09, "epoch": 4.810611059786195, "percentage": 96.21, "elapsed_time": "0:54:03", "remaining_time": "0:02:07", "throughput": 5532.14, "total_tokens": 17941712} +{"current_steps": 36455, "total_steps": 37885, "loss": 0.0, "lr": 8.679863208076787e-09, "epoch": 4.811270951563944, "percentage": 96.23, "elapsed_time": "0:54:03", "remaining_time": "0:02:07", "throughput": 5532.28, "total_tokens": 17944016} +{"current_steps": 36460, "total_steps": 37885, "loss": 0.0, "lr": 8.619400383331088e-09, "epoch": 4.811930843341692, "percentage": 96.24, "elapsed_time": "0:54:03", "remaining_time": "0:02:06", "throughput": 5532.42, "total_tokens": 17946320} +{"current_steps": 36465, "total_steps": 37885, "loss": 0.0, "lr": 8.559147971938574e-09, "epoch": 4.81259073511944, "percentage": 96.25, "elapsed_time": "0:54:04", "remaining_time": "0:02:06", "throughput": 5532.6, "total_tokens": 17948752} +{"current_steps": 36470, "total_steps": 37885, "loss": 0.0001, "lr": 8.499105986687572e-09, "epoch": 4.813250626897188, "percentage": 96.27, "elapsed_time": "0:54:04", "remaining_time": "0:02:05", "throughput": 5532.81, "total_tokens": 17951376} +{"current_steps": 36475, "total_steps": 37885, "loss": 0.0, "lr": 8.439274440321442e-09, "epoch": 4.813910518674938, "percentage": 96.28, "elapsed_time": "0:54:04", "remaining_time": "0:02:05", "throughput": 5532.93, "total_tokens": 17953616} +{"current_steps": 36480, "total_steps": 37885, "loss": 0.0, "lr": 8.379653345538918e-09, "epoch": 4.814570410452686, "percentage": 96.29, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.03, "total_tokens": 17955792} +{"current_steps": 36485, "total_steps": 37885, "loss": 0.0, "lr": 8.320242714994319e-09, "epoch": 4.815230302230434, "percentage": 96.3, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.22, "total_tokens": 17958288} +{"current_steps": 36490, "total_steps": 37885, "loss": 0.0001, "lr": 8.261042561297004e-09, "epoch": 4.815890194008182, "percentage": 96.32, "elapsed_time": "0:54:05", "remaining_time": "0:02:04", "throughput": 5533.5, "total_tokens": 17961104} +{"current_steps": 36495, "total_steps": 37885, "loss": 0.0, "lr": 8.202052897011702e-09, "epoch": 4.816550085785931, "percentage": 96.33, "elapsed_time": "0:54:06", "remaining_time": "0:02:03", "throughput": 5533.69, "total_tokens": 17963600} +{"current_steps": 36500, "total_steps": 37885, "loss": 0.0, "lr": 8.143273734658729e-09, "epoch": 4.81720997756368, "percentage": 96.34, "elapsed_time": "0:54:06", "remaining_time": "0:02:03", "throughput": 5533.89, "total_tokens": 17966096} +{"current_steps": 36505, "total_steps": 37885, "loss": 0.0, "lr": 8.084705086713439e-09, "epoch": 4.817869869341428, "percentage": 96.36, "elapsed_time": "0:54:06", "remaining_time": "0:02:02", "throughput": 5534.06, "total_tokens": 17968592} +{"current_steps": 36510, "total_steps": 37885, "loss": 0.0412, "lr": 8.026346965606556e-09, "epoch": 4.818529761119176, "percentage": 96.37, "elapsed_time": "0:54:07", "remaining_time": "0:02:02", "throughput": 5534.19, "total_tokens": 17970832} +{"current_steps": 36515, "total_steps": 37885, "loss": 0.0, "lr": 7.968199383724283e-09, "epoch": 4.819189652896925, "percentage": 96.38, "elapsed_time": "0:54:07", "remaining_time": "0:02:01", "throughput": 5534.33, "total_tokens": 17973136} +{"current_steps": 36520, "total_steps": 37885, "loss": 0.0, "lr": 7.91026235340786e-09, "epoch": 4.819849544674673, "percentage": 96.4, "elapsed_time": "0:54:07", "remaining_time": "0:02:01", "throughput": 5534.53, "total_tokens": 17975632} +{"current_steps": 36525, "total_steps": 37885, "loss": 0.0, "lr": 7.852535886954225e-09, "epoch": 4.820509436452422, "percentage": 96.41, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5534.72, "total_tokens": 17978128} +{"current_steps": 36530, "total_steps": 37885, "loss": 0.0, "lr": 7.795019996615249e-09, "epoch": 4.82116932823017, "percentage": 96.42, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5534.95, "total_tokens": 17980752} +{"current_steps": 36535, "total_steps": 37885, "loss": 0.0, "lr": 7.737714694598274e-09, "epoch": 4.821829220007919, "percentage": 96.44, "elapsed_time": "0:54:08", "remaining_time": "0:02:00", "throughput": 5535.21, "total_tokens": 17983504} +{"current_steps": 36540, "total_steps": 37885, "loss": 0.0, "lr": 7.680619993065906e-09, "epoch": 4.822489111785667, "percentage": 96.45, "elapsed_time": "0:54:09", "remaining_time": "0:01:59", "throughput": 5535.36, "total_tokens": 17985872} +{"current_steps": 36545, "total_steps": 37885, "loss": 0.0, "lr": 7.62373590413623e-09, "epoch": 4.823149003563415, "percentage": 96.46, "elapsed_time": "0:54:09", "remaining_time": "0:01:59", "throughput": 5535.6, "total_tokens": 17988560} +{"current_steps": 36550, "total_steps": 37885, "loss": 0.0001, "lr": 7.567062439882254e-09, "epoch": 4.823808895341164, "percentage": 96.48, "elapsed_time": "0:54:09", "remaining_time": "0:01:58", "throughput": 5535.77, "total_tokens": 17990928} +{"current_steps": 36555, "total_steps": 37885, "loss": 0.0025, "lr": 7.510599612332801e-09, "epoch": 4.824468787118913, "percentage": 96.49, "elapsed_time": "0:54:10", "remaining_time": "0:01:58", "throughput": 5535.92, "total_tokens": 17993296} +{"current_steps": 36560, "total_steps": 37885, "loss": 0.0, "lr": 7.454347433471397e-09, "epoch": 4.825128678896661, "percentage": 96.5, "elapsed_time": "0:54:10", "remaining_time": "0:01:57", "throughput": 5536.11, "total_tokens": 17995792} +{"current_steps": 36565, "total_steps": 37885, "loss": 0.028, "lr": 7.398305915237379e-09, "epoch": 4.825788570674409, "percentage": 96.52, "elapsed_time": "0:54:10", "remaining_time": "0:01:57", "throughput": 5536.24, "total_tokens": 17998096} +{"current_steps": 36570, "total_steps": 37885, "loss": 0.0, "lr": 7.342475069525012e-09, "epoch": 4.8264484624521575, "percentage": 96.53, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.41, "total_tokens": 18000528} +{"current_steps": 36575, "total_steps": 37885, "loss": 0.0, "lr": 7.2868549081841476e-09, "epoch": 4.827108354229907, "percentage": 96.54, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.6, "total_tokens": 18003024} +{"current_steps": 36580, "total_steps": 37885, "loss": 0.0, "lr": 7.2314454430195685e-09, "epoch": 4.827768246007655, "percentage": 96.56, "elapsed_time": "0:54:11", "remaining_time": "0:01:56", "throughput": 5536.84, "total_tokens": 18005712} +{"current_steps": 36585, "total_steps": 37885, "loss": 0.0915, "lr": 7.176246685791754e-09, "epoch": 4.828428137785403, "percentage": 96.57, "elapsed_time": "0:54:12", "remaining_time": "0:01:55", "throughput": 5537.0, "total_tokens": 18008144} +{"current_steps": 36590, "total_steps": 37885, "loss": 0.0239, "lr": 7.121258648216e-09, "epoch": 4.8290880295631515, "percentage": 96.58, "elapsed_time": "0:54:12", "remaining_time": "0:01:55", "throughput": 5537.07, "total_tokens": 18010256} +{"current_steps": 36595, "total_steps": 37885, "loss": 0.0, "lr": 7.066481341963304e-09, "epoch": 4.8297479213409, "percentage": 96.59, "elapsed_time": "0:54:13", "remaining_time": "0:01:54", "throughput": 5537.26, "total_tokens": 18012752} +{"current_steps": 36600, "total_steps": 37885, "loss": 0.0, "lr": 7.0119147786597e-09, "epoch": 4.830407813118649, "percentage": 96.61, "elapsed_time": "0:54:13", "remaining_time": "0:01:54", "throughput": 5537.38, "total_tokens": 18014992} +{"current_steps": 36605, "total_steps": 37885, "loss": 0.0004, "lr": 6.957558969886368e-09, "epoch": 4.831067704896397, "percentage": 96.62, "elapsed_time": "0:54:13", "remaining_time": "0:01:53", "throughput": 5537.6, "total_tokens": 18017552} +{"current_steps": 36610, "total_steps": 37885, "loss": 0.0, "lr": 6.9034139271803015e-09, "epoch": 4.8317275966741455, "percentage": 96.63, "elapsed_time": "0:54:14", "remaining_time": "0:01:53", "throughput": 5537.79, "total_tokens": 18020048} +{"current_steps": 36615, "total_steps": 37885, "loss": 0.0002, "lr": 6.849479662033086e-09, "epoch": 4.832387488451894, "percentage": 96.65, "elapsed_time": "0:54:14", "remaining_time": "0:01:52", "throughput": 5537.97, "total_tokens": 18022480} +{"current_steps": 36620, "total_steps": 37885, "loss": 0.0487, "lr": 6.795756185891899e-09, "epoch": 4.833047380229642, "percentage": 96.66, "elapsed_time": "0:54:14", "remaining_time": "0:01:52", "throughput": 5538.12, "total_tokens": 18024848} +{"current_steps": 36625, "total_steps": 37885, "loss": 0.0, "lr": 6.742243510159396e-09, "epoch": 4.833707272007391, "percentage": 96.67, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.26, "total_tokens": 18027152} +{"current_steps": 36630, "total_steps": 37885, "loss": 0.0, "lr": 6.688941646193047e-09, "epoch": 4.8343671637851395, "percentage": 96.69, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.44, "total_tokens": 18029584} +{"current_steps": 36635, "total_steps": 37885, "loss": 0.0, "lr": 6.635850605305804e-09, "epoch": 4.835027055562888, "percentage": 96.7, "elapsed_time": "0:54:15", "remaining_time": "0:01:51", "throughput": 5538.6, "total_tokens": 18031952} +{"current_steps": 36640, "total_steps": 37885, "loss": 0.0, "lr": 6.582970398765986e-09, "epoch": 4.835686947340636, "percentage": 96.71, "elapsed_time": "0:54:16", "remaining_time": "0:01:50", "throughput": 5538.82, "total_tokens": 18034512} +{"current_steps": 36645, "total_steps": 37885, "loss": 0.0, "lr": 6.530301037796837e-09, "epoch": 4.836346839118384, "percentage": 96.73, "elapsed_time": "0:54:16", "remaining_time": "0:01:50", "throughput": 5539.08, "total_tokens": 18037200} +{"current_steps": 36650, "total_steps": 37885, "loss": 0.0, "lr": 6.477842533577194e-09, "epoch": 4.8370067308961335, "percentage": 96.74, "elapsed_time": "0:54:16", "remaining_time": "0:01:49", "throughput": 5539.35, "total_tokens": 18039952} +{"current_steps": 36655, "total_steps": 37885, "loss": 0.0337, "lr": 6.4255948972409265e-09, "epoch": 4.837666622673882, "percentage": 96.75, "elapsed_time": "0:54:17", "remaining_time": "0:01:49", "throughput": 5539.52, "total_tokens": 18042320} +{"current_steps": 36660, "total_steps": 37885, "loss": 0.0, "lr": 6.3735581398772775e-09, "epoch": 4.83832651445163, "percentage": 96.77, "elapsed_time": "0:54:17", "remaining_time": "0:01:48", "throughput": 5539.71, "total_tokens": 18044752} +{"current_steps": 36665, "total_steps": 37885, "loss": 0.0, "lr": 6.321732272530633e-09, "epoch": 4.838986406229378, "percentage": 96.78, "elapsed_time": "0:54:17", "remaining_time": "0:01:48", "throughput": 5539.96, "total_tokens": 18047440} +{"current_steps": 36670, "total_steps": 37885, "loss": 0.0502, "lr": 6.2701173062006396e-09, "epoch": 4.839646298007127, "percentage": 96.79, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.09, "total_tokens": 18049680} +{"current_steps": 36675, "total_steps": 37885, "loss": 0.0381, "lr": 6.2187132518422004e-09, "epoch": 4.840306189784875, "percentage": 96.81, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.3, "total_tokens": 18052176} +{"current_steps": 36680, "total_steps": 37885, "loss": 0.0, "lr": 6.167520120365477e-09, "epoch": 4.840966081562624, "percentage": 96.82, "elapsed_time": "0:54:18", "remaining_time": "0:01:47", "throughput": 5540.6, "total_tokens": 18054992} +{"current_steps": 36685, "total_steps": 37885, "loss": 0.0001, "lr": 6.1165379226358895e-09, "epoch": 4.841625973340372, "percentage": 96.83, "elapsed_time": "0:54:19", "remaining_time": "0:01:46", "throughput": 5540.8, "total_tokens": 18057488} +{"current_steps": 36690, "total_steps": 37885, "loss": 0.0, "lr": 6.065766669474004e-09, "epoch": 4.842285865118121, "percentage": 96.85, "elapsed_time": "0:54:19", "remaining_time": "0:01:46", "throughput": 5541.0, "total_tokens": 18059984} +{"current_steps": 36695, "total_steps": 37885, "loss": 0.0, "lr": 6.015206371655535e-09, "epoch": 4.842945756895869, "percentage": 96.86, "elapsed_time": "0:54:19", "remaining_time": "0:01:45", "throughput": 5541.17, "total_tokens": 18062352} +{"current_steps": 36700, "total_steps": 37885, "loss": 0.0, "lr": 5.964857039911786e-09, "epoch": 4.843605648673617, "percentage": 96.87, "elapsed_time": "0:54:19", "remaining_time": "0:01:45", "throughput": 5541.26, "total_tokens": 18064464} +{"current_steps": 36705, "total_steps": 37885, "loss": 0.001, "lr": 5.914718684928766e-09, "epoch": 4.8442655404513655, "percentage": 96.89, "elapsed_time": "0:54:20", "remaining_time": "0:01:44", "throughput": 5541.45, "total_tokens": 18066896} +{"current_steps": 36710, "total_steps": 37885, "loss": 0.0009, "lr": 5.864791317348183e-09, "epoch": 4.844925432229115, "percentage": 96.9, "elapsed_time": "0:54:20", "remaining_time": "0:01:44", "throughput": 5541.63, "total_tokens": 18069328} +{"current_steps": 36715, "total_steps": 37885, "loss": 0.0, "lr": 5.815074947766674e-09, "epoch": 4.845585324006863, "percentage": 96.91, "elapsed_time": "0:54:20", "remaining_time": "0:01:43", "throughput": 5541.86, "total_tokens": 18071888} +{"current_steps": 36720, "total_steps": 37885, "loss": 0.0, "lr": 5.76556958673613e-09, "epoch": 4.846245215784611, "percentage": 96.92, "elapsed_time": "0:54:21", "remaining_time": "0:01:43", "throughput": 5542.08, "total_tokens": 18074448} +{"current_steps": 36725, "total_steps": 37885, "loss": 0.0, "lr": 5.716275244763813e-09, "epoch": 4.8469051075623595, "percentage": 96.94, "elapsed_time": "0:54:21", "remaining_time": "0:01:43", "throughput": 5542.3, "total_tokens": 18077008} +{"current_steps": 36730, "total_steps": 37885, "loss": 0.0, "lr": 5.667191932312021e-09, "epoch": 4.847564999340108, "percentage": 96.95, "elapsed_time": "0:54:21", "remaining_time": "0:01:42", "throughput": 5542.62, "total_tokens": 18079952} +{"current_steps": 36735, "total_steps": 37885, "loss": 0.0, "lr": 5.61831965979831e-09, "epoch": 4.848224891117857, "percentage": 96.96, "elapsed_time": "0:54:22", "remaining_time": "0:01:42", "throughput": 5542.81, "total_tokens": 18082384} +{"current_steps": 36740, "total_steps": 37885, "loss": 0.0, "lr": 5.5696584375956036e-09, "epoch": 4.848884782895605, "percentage": 96.98, "elapsed_time": "0:54:22", "remaining_time": "0:01:41", "throughput": 5543.0, "total_tokens": 18084816} +{"current_steps": 36745, "total_steps": 37885, "loss": 0.0, "lr": 5.5212082760316415e-09, "epoch": 4.8495446746733535, "percentage": 96.99, "elapsed_time": "0:54:22", "remaining_time": "0:01:41", "throughput": 5543.14, "total_tokens": 18087120} +{"current_steps": 36750, "total_steps": 37885, "loss": 0.0, "lr": 5.472969185389975e-09, "epoch": 4.850204566451102, "percentage": 97.0, "elapsed_time": "0:54:23", "remaining_time": "0:01:40", "throughput": 5543.38, "total_tokens": 18089744} +{"current_steps": 36755, "total_steps": 37885, "loss": 0.0, "lr": 5.424941175908637e-09, "epoch": 4.85086445822885, "percentage": 97.02, "elapsed_time": "0:54:23", "remaining_time": "0:01:40", "throughput": 5543.62, "total_tokens": 18092368} +{"current_steps": 36760, "total_steps": 37885, "loss": 0.0, "lr": 5.377124257781473e-09, "epoch": 4.851524350006599, "percentage": 97.03, "elapsed_time": "0:54:23", "remaining_time": "0:01:39", "throughput": 5543.84, "total_tokens": 18094928} +{"current_steps": 36765, "total_steps": 37885, "loss": 0.0, "lr": 5.329518441157144e-09, "epoch": 4.8521842417843475, "percentage": 97.04, "elapsed_time": "0:54:24", "remaining_time": "0:01:39", "throughput": 5543.92, "total_tokens": 18097040} +{"current_steps": 36770, "total_steps": 37885, "loss": 0.0003, "lr": 5.282123736139677e-09, "epoch": 4.852844133562096, "percentage": 97.06, "elapsed_time": "0:54:24", "remaining_time": "0:01:38", "throughput": 5544.06, "total_tokens": 18099280} +{"current_steps": 36775, "total_steps": 37885, "loss": 0.0, "lr": 5.234940152788358e-09, "epoch": 4.853504025339844, "percentage": 97.07, "elapsed_time": "0:54:24", "remaining_time": "0:01:38", "throughput": 5544.19, "total_tokens": 18101520} +{"current_steps": 36780, "total_steps": 37885, "loss": 0.0, "lr": 5.187967701117401e-09, "epoch": 4.854163917117592, "percentage": 97.08, "elapsed_time": "0:54:25", "remaining_time": "0:01:38", "throughput": 5544.28, "total_tokens": 18103632} +{"current_steps": 36785, "total_steps": 37885, "loss": 0.0, "lr": 5.141206391096387e-09, "epoch": 4.8548238088953415, "percentage": 97.1, "elapsed_time": "0:54:25", "remaining_time": "0:01:37", "throughput": 5544.45, "total_tokens": 18106000} +{"current_steps": 36790, "total_steps": 37885, "loss": 0.0066, "lr": 5.094656232650263e-09, "epoch": 4.85548370067309, "percentage": 97.11, "elapsed_time": "0:54:25", "remaining_time": "0:01:37", "throughput": 5544.69, "total_tokens": 18108624} +{"current_steps": 36795, "total_steps": 37885, "loss": 0.0, "lr": 5.0483172356586835e-09, "epoch": 4.856143592450838, "percentage": 97.12, "elapsed_time": "0:54:26", "remaining_time": "0:01:36", "throughput": 5544.9, "total_tokens": 18111120} +{"current_steps": 36800, "total_steps": 37885, "loss": 0.0, "lr": 5.002189409956892e-09, "epoch": 4.856803484228586, "percentage": 97.14, "elapsed_time": "0:54:26", "remaining_time": "0:01:36", "throughput": 5545.04, "total_tokens": 18113424} +{"current_steps": 36805, "total_steps": 37885, "loss": 0.0, "lr": 4.956272765335278e-09, "epoch": 4.857463376006335, "percentage": 97.15, "elapsed_time": "0:54:26", "remaining_time": "0:01:35", "throughput": 5545.2, "total_tokens": 18115792} +{"current_steps": 36810, "total_steps": 37885, "loss": 0.0039, "lr": 4.91056731153916e-09, "epoch": 4.858123267784084, "percentage": 97.16, "elapsed_time": "0:54:27", "remaining_time": "0:01:35", "throughput": 5545.45, "total_tokens": 18118416} +{"current_steps": 36815, "total_steps": 37885, "loss": 0.0, "lr": 4.865073058269331e-09, "epoch": 4.858783159561832, "percentage": 97.18, "elapsed_time": "0:54:27", "remaining_time": "0:01:34", "throughput": 5545.52, "total_tokens": 18120464} +{"current_steps": 36820, "total_steps": 37885, "loss": 0.0, "lr": 4.819790015181513e-09, "epoch": 4.85944305133958, "percentage": 97.19, "elapsed_time": "0:54:27", "remaining_time": "0:01:34", "throughput": 5545.72, "total_tokens": 18122960} +{"current_steps": 36825, "total_steps": 37885, "loss": 0.0657, "lr": 4.774718191886684e-09, "epoch": 4.860102943117329, "percentage": 97.2, "elapsed_time": "0:54:28", "remaining_time": "0:01:34", "throughput": 5545.94, "total_tokens": 18125520} +{"current_steps": 36830, "total_steps": 37885, "loss": 0.0, "lr": 4.729857597951081e-09, "epoch": 4.860762834895077, "percentage": 97.22, "elapsed_time": "0:54:28", "remaining_time": "0:01:33", "throughput": 5546.17, "total_tokens": 18128080} +{"current_steps": 36835, "total_steps": 37885, "loss": 0.0, "lr": 4.685208242896088e-09, "epoch": 4.861422726672826, "percentage": 97.23, "elapsed_time": "0:54:28", "remaining_time": "0:01:33", "throughput": 5546.22, "total_tokens": 18130064} +{"current_steps": 36840, "total_steps": 37885, "loss": 0.0, "lr": 4.6407701361981246e-09, "epoch": 4.862082618450574, "percentage": 97.24, "elapsed_time": "0:54:29", "remaining_time": "0:01:32", "throughput": 5546.46, "total_tokens": 18132688} +{"current_steps": 36845, "total_steps": 37885, "loss": 0.001, "lr": 4.5965432872888675e-09, "epoch": 4.862742510228323, "percentage": 97.25, "elapsed_time": "0:54:29", "remaining_time": "0:01:32", "throughput": 5546.69, "total_tokens": 18135248} +{"current_steps": 36850, "total_steps": 37885, "loss": 0.0, "lr": 4.552527705555032e-09, "epoch": 4.863402402006071, "percentage": 97.27, "elapsed_time": "0:54:29", "remaining_time": "0:01:31", "throughput": 5546.91, "total_tokens": 18137808} +{"current_steps": 36855, "total_steps": 37885, "loss": 0.001, "lr": 4.5087234003388094e-09, "epoch": 4.864062293783819, "percentage": 97.28, "elapsed_time": "0:54:30", "remaining_time": "0:01:31", "throughput": 5547.02, "total_tokens": 18139984} +{"current_steps": 36860, "total_steps": 37885, "loss": 0.0, "lr": 4.465130380937321e-09, "epoch": 4.864722185561568, "percentage": 97.29, "elapsed_time": "0:54:30", "remaining_time": "0:01:30", "throughput": 5547.16, "total_tokens": 18142288} +{"current_steps": 36865, "total_steps": 37885, "loss": 0.0, "lr": 4.42174865660283e-09, "epoch": 4.865382077339317, "percentage": 97.31, "elapsed_time": "0:54:30", "remaining_time": "0:01:30", "throughput": 5547.4, "total_tokens": 18144912} +{"current_steps": 36870, "total_steps": 37885, "loss": 0.0, "lr": 4.37857823654264e-09, "epoch": 4.866041969117065, "percentage": 97.32, "elapsed_time": "0:54:31", "remaining_time": "0:01:30", "throughput": 5547.68, "total_tokens": 18147664} +{"current_steps": 36875, "total_steps": 37885, "loss": 0.0061, "lr": 4.335619129919643e-09, "epoch": 4.866701860894813, "percentage": 97.33, "elapsed_time": "0:54:31", "remaining_time": "0:01:29", "throughput": 5547.88, "total_tokens": 18150160} +{"current_steps": 36880, "total_steps": 37885, "loss": 0.0001, "lr": 4.292871345851323e-09, "epoch": 4.867361752672561, "percentage": 97.35, "elapsed_time": "0:54:31", "remaining_time": "0:01:29", "throughput": 5548.08, "total_tokens": 18152656} +{"current_steps": 36885, "total_steps": 37885, "loss": 0.0, "lr": 4.250334893410867e-09, "epoch": 4.868021644450311, "percentage": 97.36, "elapsed_time": "0:54:32", "remaining_time": "0:01:28", "throughput": 5548.25, "total_tokens": 18155024} +{"current_steps": 36890, "total_steps": 37885, "loss": 0.0, "lr": 4.208009781626054e-09, "epoch": 4.868681536228059, "percentage": 97.37, "elapsed_time": "0:54:32", "remaining_time": "0:01:28", "throughput": 5548.45, "total_tokens": 18157584} +{"current_steps": 36895, "total_steps": 37885, "loss": 0.0133, "lr": 4.165896019480253e-09, "epoch": 4.869341428005807, "percentage": 97.39, "elapsed_time": "0:54:32", "remaining_time": "0:01:27", "throughput": 5548.66, "total_tokens": 18160080} +{"current_steps": 36900, "total_steps": 37885, "loss": 0.0, "lr": 4.123993615911759e-09, "epoch": 4.870001319783555, "percentage": 97.4, "elapsed_time": "0:54:33", "remaining_time": "0:01:27", "throughput": 5548.81, "total_tokens": 18162384} +{"current_steps": 36905, "total_steps": 37885, "loss": 0.0, "lr": 4.082302579814012e-09, "epoch": 4.870661211561304, "percentage": 97.41, "elapsed_time": "0:54:33", "remaining_time": "0:01:26", "throughput": 5549.05, "total_tokens": 18165008} +{"current_steps": 36910, "total_steps": 37885, "loss": 0.0054, "lr": 4.040822920035713e-09, "epoch": 4.871321103339053, "percentage": 97.43, "elapsed_time": "0:54:33", "remaining_time": "0:01:26", "throughput": 5549.17, "total_tokens": 18167248} +{"current_steps": 36915, "total_steps": 37885, "loss": 0.0, "lr": 3.999554645380487e-09, "epoch": 4.871980995116801, "percentage": 97.44, "elapsed_time": "0:54:34", "remaining_time": "0:01:26", "throughput": 5549.36, "total_tokens": 18169680} +{"current_steps": 36920, "total_steps": 37885, "loss": 0.0, "lr": 3.958497764607438e-09, "epoch": 4.872640886894549, "percentage": 97.45, "elapsed_time": "0:54:34", "remaining_time": "0:01:25", "throughput": 5549.49, "total_tokens": 18171920} +{"current_steps": 36925, "total_steps": 37885, "loss": 0.0, "lr": 3.917652286430484e-09, "epoch": 4.873300778672298, "percentage": 97.47, "elapsed_time": "0:54:34", "remaining_time": "0:01:25", "throughput": 5549.67, "total_tokens": 18174352} +{"current_steps": 36930, "total_steps": 37885, "loss": 0.0549, "lr": 3.87701821951869e-09, "epoch": 4.873960670450046, "percentage": 97.48, "elapsed_time": "0:54:35", "remaining_time": "0:01:24", "throughput": 5549.87, "total_tokens": 18176848} +{"current_steps": 36935, "total_steps": 37885, "loss": 0.0, "lr": 3.836595572496493e-09, "epoch": 4.874620562227794, "percentage": 97.49, "elapsed_time": "0:54:35", "remaining_time": "0:01:24", "throughput": 5550.05, "total_tokens": 18179280} +{"current_steps": 36940, "total_steps": 37885, "loss": 0.0, "lr": 3.796384353943138e-09, "epoch": 4.875280454005543, "percentage": 97.51, "elapsed_time": "0:54:35", "remaining_time": "0:01:23", "throughput": 5550.25, "total_tokens": 18181776} +{"current_steps": 36945, "total_steps": 37885, "loss": 0.0001, "lr": 3.756384572393357e-09, "epoch": 4.875940345783292, "percentage": 97.52, "elapsed_time": "0:54:36", "remaining_time": "0:01:23", "throughput": 5550.52, "total_tokens": 18184528} +{"current_steps": 36950, "total_steps": 37885, "loss": 0.0, "lr": 3.7165962363366888e-09, "epoch": 4.87660023756104, "percentage": 97.53, "elapsed_time": "0:54:36", "remaining_time": "0:01:22", "throughput": 5550.83, "total_tokens": 18187408} +{"current_steps": 36955, "total_steps": 37885, "loss": 0.0, "lr": 3.677019354217936e-09, "epoch": 4.877260129338788, "percentage": 97.55, "elapsed_time": "0:54:36", "remaining_time": "0:01:22", "throughput": 5551.05, "total_tokens": 18189968} +{"current_steps": 36960, "total_steps": 37885, "loss": 0.0, "lr": 3.637653934437046e-09, "epoch": 4.8779200211165366, "percentage": 97.56, "elapsed_time": "0:54:37", "remaining_time": "0:01:22", "throughput": 5551.24, "total_tokens": 18192400} +{"current_steps": 36965, "total_steps": 37885, "loss": 0.0, "lr": 3.5984999853490017e-09, "epoch": 4.878579912894286, "percentage": 97.57, "elapsed_time": "0:54:37", "remaining_time": "0:01:21", "throughput": 5551.47, "total_tokens": 18195024} +{"current_steps": 36970, "total_steps": 37885, "loss": 0.0, "lr": 3.5595575152639333e-09, "epoch": 4.879239804672034, "percentage": 97.58, "elapsed_time": "0:54:37", "remaining_time": "0:01:21", "throughput": 5551.66, "total_tokens": 18197456} +{"current_steps": 36975, "total_steps": 37885, "loss": 0.0, "lr": 3.5208265324472297e-09, "epoch": 4.879899696449782, "percentage": 97.6, "elapsed_time": "0:54:38", "remaining_time": "0:01:20", "throughput": 5551.95, "total_tokens": 18200272} +{"current_steps": 36980, "total_steps": 37885, "loss": 0.0, "lr": 3.4823070451190926e-09, "epoch": 4.8805595882275306, "percentage": 97.61, "elapsed_time": "0:54:38", "remaining_time": "0:01:20", "throughput": 5552.21, "total_tokens": 18202960} +{"current_steps": 36985, "total_steps": 37885, "loss": 0.0, "lr": 3.443999061455094e-09, "epoch": 4.881219480005279, "percentage": 97.62, "elapsed_time": "0:54:38", "remaining_time": "0:01:19", "throughput": 5552.3, "total_tokens": 18205136} +{"current_steps": 36990, "total_steps": 37885, "loss": 0.0, "lr": 3.4059025895857295e-09, "epoch": 4.881879371783027, "percentage": 97.64, "elapsed_time": "0:54:39", "remaining_time": "0:01:19", "throughput": 5552.42, "total_tokens": 18207376} +{"current_steps": 36995, "total_steps": 37885, "loss": 0.0, "lr": 3.368017637596865e-09, "epoch": 4.882539263560776, "percentage": 97.65, "elapsed_time": "0:54:39", "remaining_time": "0:01:18", "throughput": 5552.66, "total_tokens": 18210000} +{"current_steps": 37000, "total_steps": 37885, "loss": 0.0, "lr": 3.330344213529179e-09, "epoch": 4.8831991553385246, "percentage": 97.66, "elapsed_time": "0:54:39", "remaining_time": "0:01:18", "throughput": 5552.88, "total_tokens": 18212560} +{"current_steps": 37005, "total_steps": 37885, "loss": 0.0, "lr": 3.29288232537861e-09, "epoch": 4.883859047116273, "percentage": 97.68, "elapsed_time": "0:54:40", "remaining_time": "0:01:18", "throughput": 5553.01, "total_tokens": 18214800} +{"current_steps": 37010, "total_steps": 37885, "loss": 0.0, "lr": 3.2556319810961297e-09, "epoch": 4.884518938894021, "percentage": 97.69, "elapsed_time": "0:54:40", "remaining_time": "0:01:17", "throughput": 5553.17, "total_tokens": 18217168} +{"current_steps": 37015, "total_steps": 37885, "loss": 0.0, "lr": 3.21859318858797e-09, "epoch": 4.885178830671769, "percentage": 97.7, "elapsed_time": "0:54:40", "remaining_time": "0:01:17", "throughput": 5553.45, "total_tokens": 18219920} +{"current_steps": 37020, "total_steps": 37885, "loss": 0.0, "lr": 3.1817659557152876e-09, "epoch": 4.885838722449519, "percentage": 97.72, "elapsed_time": "0:54:41", "remaining_time": "0:01:16", "throughput": 5553.62, "total_tokens": 18222352} +{"current_steps": 37025, "total_steps": 37885, "loss": 0.0, "lr": 3.1451502902943848e-09, "epoch": 4.886498614227267, "percentage": 97.73, "elapsed_time": "0:54:41", "remaining_time": "0:01:16", "throughput": 5553.82, "total_tokens": 18224848} +{"current_steps": 37030, "total_steps": 37885, "loss": 0.0782, "lr": 3.1087462000967124e-09, "epoch": 4.887158506005015, "percentage": 97.74, "elapsed_time": "0:54:41", "remaining_time": "0:01:15", "throughput": 5554.01, "total_tokens": 18227280} +{"current_steps": 37035, "total_steps": 37885, "loss": 0.001, "lr": 3.0725536928486452e-09, "epoch": 4.887818397782763, "percentage": 97.76, "elapsed_time": "0:54:42", "remaining_time": "0:01:15", "throughput": 5554.19, "total_tokens": 18229712} +{"current_steps": 37040, "total_steps": 37885, "loss": 0.0, "lr": 3.036572776231927e-09, "epoch": 4.888478289560512, "percentage": 97.77, "elapsed_time": "0:54:42", "remaining_time": "0:01:14", "throughput": 5554.44, "total_tokens": 18232400} +{"current_steps": 37045, "total_steps": 37885, "loss": 0.0213, "lr": 3.0008034578832274e-09, "epoch": 4.889138181338261, "percentage": 97.78, "elapsed_time": "0:54:42", "remaining_time": "0:01:14", "throughput": 5554.64, "total_tokens": 18234896} +{"current_steps": 37050, "total_steps": 37885, "loss": 0.0, "lr": 2.9652457453942515e-09, "epoch": 4.889798073116009, "percentage": 97.8, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5554.84, "total_tokens": 18237392} +{"current_steps": 37055, "total_steps": 37885, "loss": 0.0, "lr": 2.9298996463119618e-09, "epoch": 4.890457964893757, "percentage": 97.81, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5555.09, "total_tokens": 18240080} +{"current_steps": 37060, "total_steps": 37885, "loss": 0.0, "lr": 2.894765168138247e-09, "epoch": 4.891117856671506, "percentage": 97.82, "elapsed_time": "0:54:43", "remaining_time": "0:01:13", "throughput": 5555.25, "total_tokens": 18242448} +{"current_steps": 37065, "total_steps": 37885, "loss": 0.0164, "lr": 2.85984231833003e-09, "epoch": 4.891777748449254, "percentage": 97.84, "elapsed_time": "0:54:44", "remaining_time": "0:01:12", "throughput": 5555.44, "total_tokens": 18244880} +{"current_steps": 37070, "total_steps": 37885, "loss": 0.0466, "lr": 2.825131104299716e-09, "epoch": 4.892437640227003, "percentage": 97.85, "elapsed_time": "0:54:44", "remaining_time": "0:01:12", "throughput": 5555.6, "total_tokens": 18247248} +{"current_steps": 37075, "total_steps": 37885, "loss": 0.0, "lr": 2.7906315334143004e-09, "epoch": 4.893097532004751, "percentage": 97.86, "elapsed_time": "0:54:44", "remaining_time": "0:01:11", "throughput": 5555.79, "total_tokens": 18249680} +{"current_steps": 37080, "total_steps": 37885, "loss": 0.0072, "lr": 2.756343612996148e-09, "epoch": 4.8937574237825, "percentage": 97.88, "elapsed_time": "0:54:45", "remaining_time": "0:01:11", "throughput": 5555.92, "total_tokens": 18251920} +{"current_steps": 37085, "total_steps": 37885, "loss": 0.0, "lr": 2.722267350322549e-09, "epoch": 4.894417315560248, "percentage": 97.89, "elapsed_time": "0:54:45", "remaining_time": "0:01:10", "throughput": 5556.18, "total_tokens": 18254608} +{"current_steps": 37090, "total_steps": 37885, "loss": 0.0, "lr": 2.6884027526259403e-09, "epoch": 4.895077207337996, "percentage": 97.9, "elapsed_time": "0:54:45", "remaining_time": "0:01:10", "throughput": 5556.36, "total_tokens": 18257040} +{"current_steps": 37095, "total_steps": 37885, "loss": 0.0, "lr": 2.654749827093905e-09, "epoch": 4.895737099115745, "percentage": 97.91, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.54, "total_tokens": 18259472} +{"current_steps": 37100, "total_steps": 37885, "loss": 0.0352, "lr": 2.6213085808691747e-09, "epoch": 4.896396990893494, "percentage": 97.93, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.71, "total_tokens": 18261840} +{"current_steps": 37105, "total_steps": 37885, "loss": 0.0, "lr": 2.588079021049072e-09, "epoch": 4.897056882671242, "percentage": 97.94, "elapsed_time": "0:54:46", "remaining_time": "0:01:09", "throughput": 5556.94, "total_tokens": 18264464} +{"current_steps": 37110, "total_steps": 37885, "loss": 0.0, "lr": 2.5550611546866217e-09, "epoch": 4.89771677444899, "percentage": 97.95, "elapsed_time": "0:54:47", "remaining_time": "0:01:08", "throughput": 5557.1, "total_tokens": 18266832} +{"current_steps": 37115, "total_steps": 37885, "loss": 0.0, "lr": 2.5222549887893295e-09, "epoch": 4.8983766662267385, "percentage": 97.97, "elapsed_time": "0:54:47", "remaining_time": "0:01:08", "throughput": 5557.28, "total_tokens": 18269264} +{"current_steps": 37120, "total_steps": 37885, "loss": 0.0, "lr": 2.4896605303204034e-09, "epoch": 4.899036558004488, "percentage": 97.98, "elapsed_time": "0:54:47", "remaining_time": "0:01:07", "throughput": 5557.48, "total_tokens": 18271760} +{"current_steps": 37125, "total_steps": 37885, "loss": 0.0, "lr": 2.4572777861976425e-09, "epoch": 4.899696449782236, "percentage": 97.99, "elapsed_time": "0:54:48", "remaining_time": "0:01:07", "throughput": 5557.6, "total_tokens": 18274000} +{"current_steps": 37130, "total_steps": 37885, "loss": 0.0, "lr": 2.425106763293994e-09, "epoch": 4.900356341559984, "percentage": 98.01, "elapsed_time": "0:54:48", "remaining_time": "0:01:06", "throughput": 5557.77, "total_tokens": 18276368} +{"current_steps": 37135, "total_steps": 37885, "loss": 0.0, "lr": 2.393147468437551e-09, "epoch": 4.9010162333377325, "percentage": 98.02, "elapsed_time": "0:54:48", "remaining_time": "0:01:06", "throughput": 5557.95, "total_tokens": 18278800} +{"current_steps": 37140, "total_steps": 37885, "loss": 0.031, "lr": 2.3613999084114434e-09, "epoch": 4.901676125115481, "percentage": 98.03, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.13, "total_tokens": 18281232} +{"current_steps": 37145, "total_steps": 37885, "loss": 0.0, "lr": 2.329864089953837e-09, "epoch": 4.90233601689323, "percentage": 98.05, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.28, "total_tokens": 18283536} +{"current_steps": 37150, "total_steps": 37885, "loss": 0.0352, "lr": 2.298540019758155e-09, "epoch": 4.902995908670978, "percentage": 98.06, "elapsed_time": "0:54:49", "remaining_time": "0:01:05", "throughput": 5558.45, "total_tokens": 18285904} +{"current_steps": 37155, "total_steps": 37885, "loss": 0.0, "lr": 2.2674277044724134e-09, "epoch": 4.9036558004487265, "percentage": 98.07, "elapsed_time": "0:54:50", "remaining_time": "0:01:04", "throughput": 5558.63, "total_tokens": 18288336} +{"current_steps": 37160, "total_steps": 37885, "loss": 0.0001, "lr": 2.236527150700218e-09, "epoch": 4.904315692226475, "percentage": 98.09, "elapsed_time": "0:54:50", "remaining_time": "0:01:04", "throughput": 5558.83, "total_tokens": 18290832} +{"current_steps": 37165, "total_steps": 37885, "loss": 0.0, "lr": 2.205838364999879e-09, "epoch": 4.904975584004223, "percentage": 98.1, "elapsed_time": "0:54:50", "remaining_time": "0:01:03", "throughput": 5559.01, "total_tokens": 18293264} +{"current_steps": 37170, "total_steps": 37885, "loss": 0.0, "lr": 2.1753613538849636e-09, "epoch": 4.905635475781972, "percentage": 98.11, "elapsed_time": "0:54:51", "remaining_time": "0:01:03", "throughput": 5559.18, "total_tokens": 18295632} +{"current_steps": 37175, "total_steps": 37885, "loss": 0.0007, "lr": 2.145096123823853e-09, "epoch": 4.9062953675597205, "percentage": 98.13, "elapsed_time": "0:54:51", "remaining_time": "0:01:02", "throughput": 5559.41, "total_tokens": 18298256} +{"current_steps": 37180, "total_steps": 37885, "loss": 0.0, "lr": 2.1150426812401866e-09, "epoch": 4.906955259337469, "percentage": 98.14, "elapsed_time": "0:54:51", "remaining_time": "0:01:02", "throughput": 5559.49, "total_tokens": 18300304} +{"current_steps": 37185, "total_steps": 37885, "loss": 0.0006, "lr": 2.0852010325125293e-09, "epoch": 4.907615151115217, "percentage": 98.15, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5559.74, "total_tokens": 18302992} +{"current_steps": 37190, "total_steps": 37885, "loss": 0.0, "lr": 2.0555711839747026e-09, "epoch": 4.908275042892965, "percentage": 98.17, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5559.92, "total_tokens": 18305424} +{"current_steps": 37195, "total_steps": 37885, "loss": 0.0, "lr": 2.0261531419153433e-09, "epoch": 4.908934934670714, "percentage": 98.18, "elapsed_time": "0:54:52", "remaining_time": "0:01:01", "throughput": 5560.03, "total_tokens": 18307664} +{"current_steps": 37200, "total_steps": 37885, "loss": 0.0004, "lr": 1.9969469125782346e-09, "epoch": 4.909594826448463, "percentage": 98.19, "elapsed_time": "0:54:53", "remaining_time": "0:01:00", "throughput": 5560.25, "total_tokens": 18310288} +{"current_steps": 37205, "total_steps": 37885, "loss": 0.0, "lr": 1.9679525021621955e-09, "epoch": 4.910254718226211, "percentage": 98.21, "elapsed_time": "0:54:53", "remaining_time": "0:01:00", "throughput": 5560.42, "total_tokens": 18312720} +{"current_steps": 37210, "total_steps": 37885, "loss": 0.0, "lr": 1.939169916820971e-09, "epoch": 4.910914610003959, "percentage": 98.22, "elapsed_time": "0:54:53", "remaining_time": "0:00:59", "throughput": 5560.69, "total_tokens": 18315536} +{"current_steps": 37215, "total_steps": 37885, "loss": 0.0, "lr": 1.910599162663673e-09, "epoch": 4.911574501781708, "percentage": 98.23, "elapsed_time": "0:54:54", "remaining_time": "0:00:59", "throughput": 5560.86, "total_tokens": 18317968} +{"current_steps": 37220, "total_steps": 37885, "loss": 0.0001, "lr": 1.8822402457540075e-09, "epoch": 4.912234393559456, "percentage": 98.24, "elapsed_time": "0:54:54", "remaining_time": "0:00:58", "throughput": 5561.07, "total_tokens": 18320528} +{"current_steps": 37225, "total_steps": 37885, "loss": 0.0, "lr": 1.8540931721110487e-09, "epoch": 4.912894285337205, "percentage": 98.26, "elapsed_time": "0:54:54", "remaining_time": "0:00:58", "throughput": 5561.24, "total_tokens": 18322960} +{"current_steps": 37230, "total_steps": 37885, "loss": 0.0001, "lr": 1.8261579477087951e-09, "epoch": 4.913554177114953, "percentage": 98.27, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.48, "total_tokens": 18325584} +{"current_steps": 37235, "total_steps": 37885, "loss": 0.0, "lr": 1.7984345784763932e-09, "epoch": 4.914214068892702, "percentage": 98.28, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.65, "total_tokens": 18328016} +{"current_steps": 37240, "total_steps": 37885, "loss": 0.0, "lr": 1.770923070297803e-09, "epoch": 4.91487396067045, "percentage": 98.3, "elapsed_time": "0:54:55", "remaining_time": "0:00:57", "throughput": 5561.78, "total_tokens": 18330256} +{"current_steps": 37245, "total_steps": 37885, "loss": 0.0533, "lr": 1.743623429012131e-09, "epoch": 4.915533852448198, "percentage": 98.31, "elapsed_time": "0:54:56", "remaining_time": "0:00:56", "throughput": 5562.06, "total_tokens": 18333072} +{"current_steps": 37250, "total_steps": 37885, "loss": 0.0, "lr": 1.7165356604136317e-09, "epoch": 4.9161937442259465, "percentage": 98.32, "elapsed_time": "0:54:56", "remaining_time": "0:00:56", "throughput": 5562.22, "total_tokens": 18335440} +{"current_steps": 37255, "total_steps": 37885, "loss": 0.0, "lr": 1.6896597702514837e-09, "epoch": 4.916853636003696, "percentage": 98.34, "elapsed_time": "0:54:56", "remaining_time": "0:00:55", "throughput": 5562.35, "total_tokens": 18337680} +{"current_steps": 37260, "total_steps": 37885, "loss": 0.0028, "lr": 1.6629957642297908e-09, "epoch": 4.917513527781444, "percentage": 98.35, "elapsed_time": "0:54:57", "remaining_time": "0:00:55", "throughput": 5562.64, "total_tokens": 18340496} +{"current_steps": 37265, "total_steps": 37885, "loss": 0.0, "lr": 1.6365436480079153e-09, "epoch": 4.918173419559192, "percentage": 98.36, "elapsed_time": "0:54:57", "remaining_time": "0:00:54", "throughput": 5562.82, "total_tokens": 18342928} +{"current_steps": 37270, "total_steps": 37885, "loss": 0.0, "lr": 1.6103034272000326e-09, "epoch": 4.9188333113369405, "percentage": 98.38, "elapsed_time": "0:54:57", "remaining_time": "0:00:54", "throughput": 5563.03, "total_tokens": 18345488} +{"current_steps": 37275, "total_steps": 37885, "loss": 0.0, "lr": 1.5842751073753546e-09, "epoch": 4.919493203114689, "percentage": 98.39, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.26, "total_tokens": 18348112} +{"current_steps": 37280, "total_steps": 37885, "loss": 0.0, "lr": 1.5584586940584622e-09, "epoch": 4.920153094892438, "percentage": 98.4, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.41, "total_tokens": 18350416} +{"current_steps": 37285, "total_steps": 37885, "loss": 0.028, "lr": 1.5328541927286387e-09, "epoch": 4.920812986670186, "percentage": 98.42, "elapsed_time": "0:54:58", "remaining_time": "0:00:53", "throughput": 5563.6, "total_tokens": 18352912} +{"current_steps": 37290, "total_steps": 37885, "loss": 0.0, "lr": 1.507461608819982e-09, "epoch": 4.9214728784479345, "percentage": 98.43, "elapsed_time": "0:54:59", "remaining_time": "0:00:52", "throughput": 5563.84, "total_tokens": 18355536} +{"current_steps": 37295, "total_steps": 37885, "loss": 0.0, "lr": 1.4822809477222919e-09, "epoch": 4.922132770225683, "percentage": 98.44, "elapsed_time": "0:54:59", "remaining_time": "0:00:52", "throughput": 5563.99, "total_tokens": 18357840} +{"current_steps": 37300, "total_steps": 37885, "loss": 0.0004, "lr": 1.457312214779627e-09, "epoch": 4.922792662003431, "percentage": 98.46, "elapsed_time": "0:54:59", "remaining_time": "0:00:51", "throughput": 5564.19, "total_tokens": 18360336} +{"current_steps": 37305, "total_steps": 37885, "loss": 0.0, "lr": 1.4325554152916364e-09, "epoch": 4.92345255378118, "percentage": 98.47, "elapsed_time": "0:55:00", "remaining_time": "0:00:51", "throughput": 5564.49, "total_tokens": 18363216} +{"current_steps": 37310, "total_steps": 37885, "loss": 0.0, "lr": 1.408010554512673e-09, "epoch": 4.9241124455589285, "percentage": 98.48, "elapsed_time": "0:55:00", "remaining_time": "0:00:50", "throughput": 5564.76, "total_tokens": 18365968} +{"current_steps": 37315, "total_steps": 37885, "loss": 0.0, "lr": 1.3836776376522364e-09, "epoch": 4.924772337336677, "percentage": 98.5, "elapsed_time": "0:55:00", "remaining_time": "0:00:50", "throughput": 5564.95, "total_tokens": 18368400} +{"current_steps": 37320, "total_steps": 37885, "loss": 0.0, "lr": 1.3595566698748617e-09, "epoch": 4.925432229114425, "percentage": 98.51, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.1, "total_tokens": 18370704} +{"current_steps": 37325, "total_steps": 37885, "loss": 0.0, "lr": 1.3356476562998986e-09, "epoch": 4.926092120892173, "percentage": 98.52, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.32, "total_tokens": 18373264} +{"current_steps": 37330, "total_steps": 37885, "loss": 0.0001, "lr": 1.3119506020020653e-09, "epoch": 4.9267520126699225, "percentage": 98.54, "elapsed_time": "0:55:01", "remaining_time": "0:00:49", "throughput": 5565.48, "total_tokens": 18375632} +{"current_steps": 37335, "total_steps": 37885, "loss": 0.0226, "lr": 1.2884655120107835e-09, "epoch": 4.927411904447671, "percentage": 98.55, "elapsed_time": "0:55:02", "remaining_time": "0:00:48", "throughput": 5565.69, "total_tokens": 18378192} +{"current_steps": 37340, "total_steps": 37885, "loss": 0.0, "lr": 1.26519239131051e-09, "epoch": 4.928071796225419, "percentage": 98.56, "elapsed_time": "0:55:02", "remaining_time": "0:00:48", "throughput": 5565.85, "total_tokens": 18380560} +{"current_steps": 37345, "total_steps": 37885, "loss": 0.0, "lr": 1.2421312448408494e-09, "epoch": 4.928731688003167, "percentage": 98.57, "elapsed_time": "0:55:02", "remaining_time": "0:00:47", "throughput": 5566.1, "total_tokens": 18383248} +{"current_steps": 37350, "total_steps": 37885, "loss": 0.0014, "lr": 1.2192820774965529e-09, "epoch": 4.929391579780916, "percentage": 98.59, "elapsed_time": "0:55:03", "remaining_time": "0:00:47", "throughput": 5566.27, "total_tokens": 18385616} +{"current_steps": 37355, "total_steps": 37885, "loss": 0.0, "lr": 1.1966448941269635e-09, "epoch": 4.930051471558665, "percentage": 98.6, "elapsed_time": "0:55:03", "remaining_time": "0:00:46", "throughput": 5566.36, "total_tokens": 18387728} +{"current_steps": 37360, "total_steps": 37885, "loss": 0.0, "lr": 1.1742196995366827e-09, "epoch": 4.930711363336413, "percentage": 98.61, "elapsed_time": "0:55:03", "remaining_time": "0:00:46", "throughput": 5566.53, "total_tokens": 18390096} +{"current_steps": 37365, "total_steps": 37885, "loss": 0.0, "lr": 1.1520064984853473e-09, "epoch": 4.931371255114161, "percentage": 98.63, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5566.74, "total_tokens": 18392656} +{"current_steps": 37370, "total_steps": 37885, "loss": 0.0, "lr": 1.1300052956876304e-09, "epoch": 4.93203114689191, "percentage": 98.64, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5566.93, "total_tokens": 18395088} +{"current_steps": 37375, "total_steps": 37885, "loss": 0.0, "lr": 1.1082160958129082e-09, "epoch": 4.932691038669658, "percentage": 98.65, "elapsed_time": "0:55:04", "remaining_time": "0:00:45", "throughput": 5567.09, "total_tokens": 18397456} +{"current_steps": 37380, "total_steps": 37885, "loss": 0.0, "lr": 1.0866389034860368e-09, "epoch": 4.933350930447407, "percentage": 98.67, "elapsed_time": "0:55:05", "remaining_time": "0:00:44", "throughput": 5567.3, "total_tokens": 18400016} +{"current_steps": 37385, "total_steps": 37885, "loss": 0.0, "lr": 1.0652737232864639e-09, "epoch": 4.934010822225155, "percentage": 98.68, "elapsed_time": "0:55:05", "remaining_time": "0:00:44", "throughput": 5567.53, "total_tokens": 18402640} +{"current_steps": 37390, "total_steps": 37885, "loss": 0.0, "lr": 1.0441205597487845e-09, "epoch": 4.934670714002904, "percentage": 98.69, "elapsed_time": "0:55:05", "remaining_time": "0:00:43", "throughput": 5567.7, "total_tokens": 18405072} +{"current_steps": 37395, "total_steps": 37885, "loss": 0.0001, "lr": 1.0231794173626296e-09, "epoch": 4.935330605780652, "percentage": 98.71, "elapsed_time": "0:55:06", "remaining_time": "0:00:43", "throughput": 5567.85, "total_tokens": 18407440} +{"current_steps": 37400, "total_steps": 37885, "loss": 0.0003, "lr": 1.002450300572666e-09, "epoch": 4.9359904975584, "percentage": 98.72, "elapsed_time": "0:55:06", "remaining_time": "0:00:42", "throughput": 5568.03, "total_tokens": 18409872} +{"current_steps": 37405, "total_steps": 37885, "loss": 0.0, "lr": 9.819332137784853e-10, "epoch": 4.936650389336149, "percentage": 98.73, "elapsed_time": "0:55:06", "remaining_time": "0:00:42", "throughput": 5568.23, "total_tokens": 18412432} +{"current_steps": 37410, "total_steps": 37885, "loss": 0.0016, "lr": 9.616281613347155e-10, "epoch": 4.937310281113898, "percentage": 98.75, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.33, "total_tokens": 18414608} +{"current_steps": 37415, "total_steps": 37885, "loss": 0.0044, "lr": 9.415351475507982e-10, "epoch": 4.937970172891646, "percentage": 98.76, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.61, "total_tokens": 18417424} +{"current_steps": 37420, "total_steps": 37885, "loss": 0.0, "lr": 9.216541766914332e-10, "epoch": 4.938630064669394, "percentage": 98.77, "elapsed_time": "0:55:07", "remaining_time": "0:00:41", "throughput": 5568.73, "total_tokens": 18419664} +{"current_steps": 37425, "total_steps": 37885, "loss": 0.0001, "lr": 9.019852529762451e-10, "epoch": 4.939289956447142, "percentage": 98.79, "elapsed_time": "0:55:08", "remaining_time": "0:00:40", "throughput": 5568.97, "total_tokens": 18422352} +{"current_steps": 37430, "total_steps": 37885, "loss": 0.0, "lr": 8.825283805796724e-10, "epoch": 4.939949848224892, "percentage": 98.8, "elapsed_time": "0:55:08", "remaining_time": "0:00:40", "throughput": 5569.19, "total_tokens": 18424912} +{"current_steps": 37435, "total_steps": 37885, "loss": 0.0, "lr": 8.632835636315227e-10, "epoch": 4.94060974000264, "percentage": 98.81, "elapsed_time": "0:55:08", "remaining_time": "0:00:39", "throughput": 5569.43, "total_tokens": 18427600} +{"current_steps": 37440, "total_steps": 37885, "loss": 0.0, "lr": 8.442508062163068e-10, "epoch": 4.941269631780388, "percentage": 98.83, "elapsed_time": "0:55:09", "remaining_time": "0:00:39", "throughput": 5569.61, "total_tokens": 18430032} +{"current_steps": 37445, "total_steps": 37885, "loss": 0.0, "lr": 8.254301123734597e-10, "epoch": 4.941929523558136, "percentage": 98.84, "elapsed_time": "0:55:09", "remaining_time": "0:00:38", "throughput": 5569.77, "total_tokens": 18432400} +{"current_steps": 37450, "total_steps": 37885, "loss": 0.0322, "lr": 8.068214860976752e-10, "epoch": 4.942589415335885, "percentage": 98.85, "elapsed_time": "0:55:09", "remaining_time": "0:00:38", "throughput": 5569.93, "total_tokens": 18434768} +{"current_steps": 37455, "total_steps": 37885, "loss": 0.0861, "lr": 7.884249313383495e-10, "epoch": 4.943249307113634, "percentage": 98.86, "elapsed_time": "0:55:10", "remaining_time": "0:00:38", "throughput": 5570.03, "total_tokens": 18436944} +{"current_steps": 37460, "total_steps": 37885, "loss": 0.0308, "lr": 7.702404520002481e-10, "epoch": 4.943909198891382, "percentage": 98.88, "elapsed_time": "0:55:10", "remaining_time": "0:00:37", "throughput": 5570.16, "total_tokens": 18439248} +{"current_steps": 37465, "total_steps": 37885, "loss": 0.0001, "lr": 7.522680519426173e-10, "epoch": 4.94456909066913, "percentage": 98.89, "elapsed_time": "0:55:10", "remaining_time": "0:00:37", "throughput": 5570.34, "total_tokens": 18441680} +{"current_steps": 37470, "total_steps": 37885, "loss": 0.0, "lr": 7.345077349801832e-10, "epoch": 4.945228982446879, "percentage": 98.9, "elapsed_time": "0:55:11", "remaining_time": "0:00:36", "throughput": 5570.43, "total_tokens": 18443856} +{"current_steps": 37475, "total_steps": 37885, "loss": 0.0, "lr": 7.169595048823751e-10, "epoch": 4.945888874224627, "percentage": 98.92, "elapsed_time": "0:55:11", "remaining_time": "0:00:36", "throughput": 5570.61, "total_tokens": 18446288} +{"current_steps": 37480, "total_steps": 37885, "loss": 0.0496, "lr": 6.996233653736583e-10, "epoch": 4.946548766002375, "percentage": 98.93, "elapsed_time": "0:55:11", "remaining_time": "0:00:35", "throughput": 5570.7, "total_tokens": 18448464} +{"current_steps": 37485, "total_steps": 37885, "loss": 0.0, "lr": 6.824993201334228e-10, "epoch": 4.947208657780124, "percentage": 98.94, "elapsed_time": "0:55:12", "remaining_time": "0:00:35", "throughput": 5570.84, "total_tokens": 18450768} +{"current_steps": 37490, "total_steps": 37885, "loss": 0.0, "lr": 6.655873727963168e-10, "epoch": 4.947868549557873, "percentage": 98.96, "elapsed_time": "0:55:12", "remaining_time": "0:00:34", "throughput": 5571.0, "total_tokens": 18453136} +{"current_steps": 37495, "total_steps": 37885, "loss": 0.0, "lr": 6.488875269516914e-10, "epoch": 4.948528441335621, "percentage": 98.97, "elapsed_time": "0:55:12", "remaining_time": "0:00:34", "throughput": 5571.15, "total_tokens": 18455504} +{"current_steps": 37500, "total_steps": 37885, "loss": 0.0, "lr": 6.323997861439334e-10, "epoch": 4.949188333113369, "percentage": 98.98, "elapsed_time": "0:55:13", "remaining_time": "0:00:34", "throughput": 5571.37, "total_tokens": 18458064} +{"current_steps": 37505, "total_steps": 37885, "loss": 0.0, "lr": 6.16124153872466e-10, "epoch": 4.9498482248911175, "percentage": 99.0, "elapsed_time": "0:55:13", "remaining_time": "0:00:33", "throughput": 5571.59, "total_tokens": 18460624} +{"current_steps": 37510, "total_steps": 37885, "loss": 0.0, "lr": 6.00060633591748e-10, "epoch": 4.950508116668866, "percentage": 99.01, "elapsed_time": "0:55:13", "remaining_time": "0:00:33", "throughput": 5571.79, "total_tokens": 18463120} +{"current_steps": 37515, "total_steps": 37885, "loss": 0.0012, "lr": 5.842092287109412e-10, "epoch": 4.951168008446615, "percentage": 99.02, "elapsed_time": "0:55:14", "remaining_time": "0:00:32", "throughput": 5571.99, "total_tokens": 18465616} +{"current_steps": 37520, "total_steps": 37885, "loss": 0.0, "lr": 5.685699425945767e-10, "epoch": 4.951827900224363, "percentage": 99.04, "elapsed_time": "0:55:14", "remaining_time": "0:00:32", "throughput": 5572.15, "total_tokens": 18467984} +{"current_steps": 37525, "total_steps": 37885, "loss": 0.0, "lr": 5.531427785619991e-10, "epoch": 4.9524877920021115, "percentage": 99.05, "elapsed_time": "0:55:14", "remaining_time": "0:00:31", "throughput": 5572.22, "total_tokens": 18470032} +{"current_steps": 37530, "total_steps": 37885, "loss": 0.0, "lr": 5.379277398873671e-10, "epoch": 4.95314768377986, "percentage": 99.06, "elapsed_time": "0:55:14", "remaining_time": "0:00:31", "throughput": 5572.36, "total_tokens": 18472336} +{"current_steps": 37535, "total_steps": 37885, "loss": 0.0022, "lr": 5.229248298000976e-10, "epoch": 4.953807575557608, "percentage": 99.08, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5572.64, "total_tokens": 18475088} +{"current_steps": 37540, "total_steps": 37885, "loss": 0.0252, "lr": 5.081340514843102e-10, "epoch": 4.954467467335357, "percentage": 99.09, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5572.88, "total_tokens": 18477776} +{"current_steps": 37545, "total_steps": 37885, "loss": 0.061, "lr": 4.935554080793825e-10, "epoch": 4.9551273591131055, "percentage": 99.1, "elapsed_time": "0:55:15", "remaining_time": "0:00:30", "throughput": 5573.1, "total_tokens": 18480336} +{"current_steps": 37550, "total_steps": 37885, "loss": 0.0, "lr": 4.791889026793949e-10, "epoch": 4.955787250890854, "percentage": 99.12, "elapsed_time": "0:55:16", "remaining_time": "0:00:29", "throughput": 5573.22, "total_tokens": 18482640} +{"current_steps": 37555, "total_steps": 37885, "loss": 0.0, "lr": 4.6503453833368623e-10, "epoch": 4.956447142668602, "percentage": 99.13, "elapsed_time": "0:55:16", "remaining_time": "0:00:29", "throughput": 5573.35, "total_tokens": 18484880} +{"current_steps": 37560, "total_steps": 37885, "loss": 0.0, "lr": 4.5109231804629776e-10, "epoch": 4.95710703444635, "percentage": 99.14, "elapsed_time": "0:55:16", "remaining_time": "0:00:28", "throughput": 5573.64, "total_tokens": 18487696} +{"current_steps": 37565, "total_steps": 37885, "loss": 0.0473, "lr": 4.37362244776307e-10, "epoch": 4.9577669262240995, "percentage": 99.16, "elapsed_time": "0:55:17", "remaining_time": "0:00:28", "throughput": 5573.89, "total_tokens": 18490384} +{"current_steps": 37570, "total_steps": 37885, "loss": 0.0, "lr": 4.238443214380494e-10, "epoch": 4.958426818001848, "percentage": 99.17, "elapsed_time": "0:55:17", "remaining_time": "0:00:27", "throughput": 5574.06, "total_tokens": 18492816} +{"current_steps": 37575, "total_steps": 37885, "loss": 0.0, "lr": 4.105385509004522e-10, "epoch": 4.959086709779596, "percentage": 99.18, "elapsed_time": "0:55:17", "remaining_time": "0:00:27", "throughput": 5574.28, "total_tokens": 18495376} +{"current_steps": 37580, "total_steps": 37885, "loss": 0.0, "lr": 3.974449359875898e-10, "epoch": 4.959746601557344, "percentage": 99.19, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.4, "total_tokens": 18497616} +{"current_steps": 37585, "total_steps": 37885, "loss": 0.0, "lr": 3.8456347947835034e-10, "epoch": 4.960406493335093, "percentage": 99.21, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.58, "total_tokens": 18500048} +{"current_steps": 37590, "total_steps": 37885, "loss": 0.0, "lr": 3.7189418410699114e-10, "epoch": 4.961066385112842, "percentage": 99.22, "elapsed_time": "0:55:18", "remaining_time": "0:00:26", "throughput": 5574.79, "total_tokens": 18502608} +{"current_steps": 37595, "total_steps": 37885, "loss": 0.0044, "lr": 3.5943705256236136e-10, "epoch": 4.96172627689059, "percentage": 99.23, "elapsed_time": "0:55:19", "remaining_time": "0:00:25", "throughput": 5574.99, "total_tokens": 18505104} +{"current_steps": 37600, "total_steps": 37885, "loss": 0.0, "lr": 3.4719208748834607e-10, "epoch": 4.962386168668338, "percentage": 99.25, "elapsed_time": "0:55:19", "remaining_time": "0:00:25", "throughput": 5575.16, "total_tokens": 18507536} +{"current_steps": 37605, "total_steps": 37885, "loss": 0.0001, "lr": 3.3515929148397737e-10, "epoch": 4.963046060446087, "percentage": 99.26, "elapsed_time": "0:55:19", "remaining_time": "0:00:24", "throughput": 5575.36, "total_tokens": 18510032} +{"current_steps": 37610, "total_steps": 37885, "loss": 0.0, "lr": 3.2333866710299027e-10, "epoch": 4.963705952223835, "percentage": 99.27, "elapsed_time": "0:55:20", "remaining_time": "0:00:24", "throughput": 5575.56, "total_tokens": 18512592} +{"current_steps": 37615, "total_steps": 37885, "loss": 0.0, "lr": 3.1173021685426684e-10, "epoch": 4.964365844001584, "percentage": 99.29, "elapsed_time": "0:55:20", "remaining_time": "0:00:23", "throughput": 5575.65, "total_tokens": 18514704} +{"current_steps": 37620, "total_steps": 37885, "loss": 0.0095, "lr": 3.003339432016139e-10, "epoch": 4.965025735779332, "percentage": 99.3, "elapsed_time": "0:55:20", "remaining_time": "0:00:23", "throughput": 5575.87, "total_tokens": 18517328} +{"current_steps": 37625, "total_steps": 37885, "loss": 0.0006, "lr": 2.891498485638744e-10, "epoch": 4.965685627557081, "percentage": 99.31, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.14, "total_tokens": 18520080} +{"current_steps": 37630, "total_steps": 37885, "loss": 0.0, "lr": 2.781779353147051e-10, "epoch": 4.966345519334829, "percentage": 99.33, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.28, "total_tokens": 18522384} +{"current_steps": 37635, "total_steps": 37885, "loss": 0.0, "lr": 2.6741820578290997e-10, "epoch": 4.967005411112577, "percentage": 99.34, "elapsed_time": "0:55:21", "remaining_time": "0:00:22", "throughput": 5576.61, "total_tokens": 18525392} +{"current_steps": 37640, "total_steps": 37885, "loss": 0.0322, "lr": 2.568706622519956e-10, "epoch": 4.967665302890326, "percentage": 99.35, "elapsed_time": "0:55:22", "remaining_time": "0:00:21", "throughput": 5576.77, "total_tokens": 18527760} +{"current_steps": 37645, "total_steps": 37885, "loss": 0.0002, "lr": 2.465353069608378e-10, "epoch": 4.968325194668075, "percentage": 99.37, "elapsed_time": "0:55:22", "remaining_time": "0:00:21", "throughput": 5576.88, "total_tokens": 18530000} +{"current_steps": 37650, "total_steps": 37885, "loss": 0.0836, "lr": 2.3641214210279314e-10, "epoch": 4.968985086445823, "percentage": 99.38, "elapsed_time": "0:55:22", "remaining_time": "0:00:20", "throughput": 5577.11, "total_tokens": 18532624} +{"current_steps": 37655, "total_steps": 37885, "loss": 0.0, "lr": 2.265011698266983e-10, "epoch": 4.969644978223571, "percentage": 99.39, "elapsed_time": "0:55:23", "remaining_time": "0:00:20", "throughput": 5577.29, "total_tokens": 18535056} +{"current_steps": 37660, "total_steps": 37885, "loss": 0.0, "lr": 2.168023922357598e-10, "epoch": 4.9703048700013195, "percentage": 99.41, "elapsed_time": "0:55:23", "remaining_time": "0:00:19", "throughput": 5577.47, "total_tokens": 18537488} +{"current_steps": 37665, "total_steps": 37885, "loss": 0.0, "lr": 2.0731581138877518e-10, "epoch": 4.970964761779069, "percentage": 99.42, "elapsed_time": "0:55:23", "remaining_time": "0:00:19", "throughput": 5577.62, "total_tokens": 18539792} +{"current_steps": 37670, "total_steps": 37885, "loss": 0.0, "lr": 1.980414292990229e-10, "epoch": 4.971624653556817, "percentage": 99.43, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5577.8, "total_tokens": 18542224} +{"current_steps": 37675, "total_steps": 37885, "loss": 0.0, "lr": 1.889792479350394e-10, "epoch": 4.972284545334565, "percentage": 99.45, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5578.05, "total_tokens": 18544912} +{"current_steps": 37680, "total_steps": 37885, "loss": 0.0, "lr": 1.8012926922017502e-10, "epoch": 4.9729444371123135, "percentage": 99.46, "elapsed_time": "0:55:24", "remaining_time": "0:00:18", "throughput": 5578.21, "total_tokens": 18547280} +{"current_steps": 37685, "total_steps": 37885, "loss": 0.0, "lr": 1.714914950327051e-10, "epoch": 4.973604328890062, "percentage": 99.47, "elapsed_time": "0:55:25", "remaining_time": "0:00:17", "throughput": 5578.43, "total_tokens": 18549840} +{"current_steps": 37690, "total_steps": 37885, "loss": 0.0, "lr": 1.6306592720594093e-10, "epoch": 4.974264220667811, "percentage": 99.49, "elapsed_time": "0:55:25", "remaining_time": "0:00:17", "throughput": 5578.59, "total_tokens": 18552208} +{"current_steps": 37695, "total_steps": 37885, "loss": 0.0233, "lr": 1.5485256752822973e-10, "epoch": 4.974924112445559, "percentage": 99.5, "elapsed_time": "0:55:25", "remaining_time": "0:00:16", "throughput": 5578.78, "total_tokens": 18554640} +{"current_steps": 37700, "total_steps": 37885, "loss": 0.0, "lr": 1.4685141774273268e-10, "epoch": 4.9755840042233075, "percentage": 99.51, "elapsed_time": "0:55:26", "remaining_time": "0:00:16", "throughput": 5579.03, "total_tokens": 18557392} +{"current_steps": 37705, "total_steps": 37885, "loss": 0.0, "lr": 1.3906247954764694e-10, "epoch": 4.976243896001056, "percentage": 99.52, "elapsed_time": "0:55:26", "remaining_time": "0:00:15", "throughput": 5579.17, "total_tokens": 18559696} +{"current_steps": 37710, "total_steps": 37885, "loss": 0.0, "lr": 1.3148575459609457e-10, "epoch": 4.976903787778804, "percentage": 99.54, "elapsed_time": "0:55:26", "remaining_time": "0:00:15", "throughput": 5579.3, "total_tokens": 18561936} +{"current_steps": 37715, "total_steps": 37885, "loss": 0.0, "lr": 1.241212444962336e-10, "epoch": 4.977563679556553, "percentage": 99.55, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.5, "total_tokens": 18564432} +{"current_steps": 37720, "total_steps": 37885, "loss": 0.0, "lr": 1.169689508111471e-10, "epoch": 4.9782235713343015, "percentage": 99.56, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.7, "total_tokens": 18566928} +{"current_steps": 37725, "total_steps": 37885, "loss": 0.0, "lr": 1.1002887505873193e-10, "epoch": 4.97888346311205, "percentage": 99.58, "elapsed_time": "0:55:27", "remaining_time": "0:00:14", "throughput": 5579.98, "total_tokens": 18569744} +{"current_steps": 37730, "total_steps": 37885, "loss": 0.0001, "lr": 1.0330101871214303e-10, "epoch": 4.979543354889798, "percentage": 99.59, "elapsed_time": "0:55:28", "remaining_time": "0:00:13", "throughput": 5580.12, "total_tokens": 18572048} +{"current_steps": 37735, "total_steps": 37885, "loss": 0.0, "lr": 9.678538319923824e-11, "epoch": 4.980203246667546, "percentage": 99.6, "elapsed_time": "0:55:28", "remaining_time": "0:00:13", "throughput": 5580.31, "total_tokens": 18574544} +{"current_steps": 37740, "total_steps": 37885, "loss": 0.0005, "lr": 9.048196990280033e-11, "epoch": 4.980863138445295, "percentage": 99.62, "elapsed_time": "0:55:28", "remaining_time": "0:00:12", "throughput": 5580.58, "total_tokens": 18577296} +{"current_steps": 37745, "total_steps": 37885, "loss": 0.0, "lr": 8.439078016087009e-11, "epoch": 4.981523030223044, "percentage": 99.63, "elapsed_time": "0:55:29", "remaining_time": "0:00:12", "throughput": 5580.72, "total_tokens": 18579600} +{"current_steps": 37750, "total_steps": 37885, "loss": 0.0997, "lr": 7.851181526619122e-11, "epoch": 4.982182922000792, "percentage": 99.64, "elapsed_time": "0:55:29", "remaining_time": "0:00:11", "throughput": 5580.95, "total_tokens": 18582224} +{"current_steps": 37755, "total_steps": 37885, "loss": 0.0, "lr": 7.284507646654336e-11, "epoch": 4.98284281377854, "percentage": 99.66, "elapsed_time": "0:55:29", "remaining_time": "0:00:11", "throughput": 5581.03, "total_tokens": 18584336} +{"current_steps": 37760, "total_steps": 37885, "loss": 0.0472, "lr": 6.739056496452011e-11, "epoch": 4.983502705556289, "percentage": 99.67, "elapsed_time": "0:55:30", "remaining_time": "0:00:11", "throughput": 5581.23, "total_tokens": 18586896} +{"current_steps": 37765, "total_steps": 37885, "loss": 0.0001, "lr": 6.214828191797305e-11, "epoch": 4.984162597334037, "percentage": 99.68, "elapsed_time": "0:55:30", "remaining_time": "0:00:10", "throughput": 5581.46, "total_tokens": 18589520} +{"current_steps": 37770, "total_steps": 37885, "loss": 0.0324, "lr": 5.711822843945669e-11, "epoch": 4.984822489111786, "percentage": 99.7, "elapsed_time": "0:55:30", "remaining_time": "0:00:10", "throughput": 5581.64, "total_tokens": 18591952} +{"current_steps": 37775, "total_steps": 37885, "loss": 0.0, "lr": 5.230040559656146e-11, "epoch": 4.985482380889534, "percentage": 99.71, "elapsed_time": "0:55:31", "remaining_time": "0:00:09", "throughput": 5581.81, "total_tokens": 18594384} +{"current_steps": 37780, "total_steps": 37885, "loss": 0.0019, "lr": 4.769481441191381e-11, "epoch": 4.986142272667283, "percentage": 99.72, "elapsed_time": "0:55:31", "remaining_time": "0:00:09", "throughput": 5582.0, "total_tokens": 18596880} +{"current_steps": 37785, "total_steps": 37885, "loss": 0.0038, "lr": 4.330145586284306e-11, "epoch": 4.986802164445031, "percentage": 99.74, "elapsed_time": "0:55:31", "remaining_time": "0:00:08", "throughput": 5582.26, "total_tokens": 18599632} +{"current_steps": 37790, "total_steps": 37885, "loss": 0.0, "lr": 3.912033088204758e-11, "epoch": 4.987462056222779, "percentage": 99.75, "elapsed_time": "0:55:32", "remaining_time": "0:00:08", "throughput": 5582.46, "total_tokens": 18602128} +{"current_steps": 37795, "total_steps": 37885, "loss": 0.0, "lr": 3.515144035670658e-11, "epoch": 4.988121948000527, "percentage": 99.76, "elapsed_time": "0:55:32", "remaining_time": "0:00:07", "throughput": 5582.61, "total_tokens": 18604496} +{"current_steps": 37800, "total_steps": 37885, "loss": 0.0, "lr": 3.139478512936833e-11, "epoch": 4.988781839778277, "percentage": 99.78, "elapsed_time": "0:55:32", "remaining_time": "0:00:07", "throughput": 5582.79, "total_tokens": 18606992} +{"current_steps": 37805, "total_steps": 37885, "loss": 0.0, "lr": 2.7850365997283963e-11, "epoch": 4.989441731556025, "percentage": 99.79, "elapsed_time": "0:55:33", "remaining_time": "0:00:07", "throughput": 5582.91, "total_tokens": 18609232} +{"current_steps": 37810, "total_steps": 37885, "loss": 0.0, "lr": 2.4518183712740615e-11, "epoch": 4.990101623333773, "percentage": 99.8, "elapsed_time": "0:55:33", "remaining_time": "0:00:06", "throughput": 5583.03, "total_tokens": 18611472} +{"current_steps": 37815, "total_steps": 37885, "loss": 0.0, "lr": 2.1398238982839324e-11, "epoch": 4.990761515111521, "percentage": 99.82, "elapsed_time": "0:55:33", "remaining_time": "0:00:06", "throughput": 5583.17, "total_tokens": 18613776} +{"current_steps": 37820, "total_steps": 37885, "loss": 0.0, "lr": 1.8490532470050168e-11, "epoch": 4.99142140688927, "percentage": 99.83, "elapsed_time": "0:55:34", "remaining_time": "0:00:05", "throughput": 5583.37, "total_tokens": 18616272} +{"current_steps": 37825, "total_steps": 37885, "loss": 0.028, "lr": 1.5795064791213065e-11, "epoch": 4.992081298667019, "percentage": 99.84, "elapsed_time": "0:55:34", "remaining_time": "0:00:05", "throughput": 5583.57, "total_tokens": 18618768} +{"current_steps": 37830, "total_steps": 37885, "loss": 0.0, "lr": 1.3311836518647978e-11, "epoch": 4.992741190444767, "percentage": 99.85, "elapsed_time": "0:55:34", "remaining_time": "0:00:04", "throughput": 5583.78, "total_tokens": 18621328} +{"current_steps": 37835, "total_steps": 37885, "loss": 0.0, "lr": 1.104084817926676e-11, "epoch": 4.993401082222515, "percentage": 99.87, "elapsed_time": "0:55:35", "remaining_time": "0:00:04", "throughput": 5583.9, "total_tokens": 18623568} +{"current_steps": 37840, "total_steps": 37885, "loss": 0.0, "lr": 8.982100255128244e-12, "epoch": 4.994060974000264, "percentage": 99.88, "elapsed_time": "0:55:35", "remaining_time": "0:00:03", "throughput": 5584.08, "total_tokens": 18626000} +{"current_steps": 37845, "total_steps": 37885, "loss": 0.0, "lr": 7.135593183216215e-12, "epoch": 4.994720865778012, "percentage": 99.89, "elapsed_time": "0:55:35", "remaining_time": "0:00:03", "throughput": 5584.27, "total_tokens": 18628496} +{"current_steps": 37850, "total_steps": 37885, "loss": 0.0, "lr": 5.501327355328378e-12, "epoch": 4.995380757555761, "percentage": 99.91, "elapsed_time": "0:55:36", "remaining_time": "0:00:03", "throughput": 5584.45, "total_tokens": 18630928} +{"current_steps": 37855, "total_steps": 37885, "loss": 0.0001, "lr": 4.0793031184094275e-12, "epoch": 4.996040649333509, "percentage": 99.92, "elapsed_time": "0:55:36", "remaining_time": "0:00:02", "throughput": 5584.66, "total_tokens": 18633488} +{"current_steps": 37860, "total_steps": 37885, "loss": 0.0, "lr": 2.8695207742179816e-12, "epoch": 4.996700541111258, "percentage": 99.93, "elapsed_time": "0:55:36", "remaining_time": "0:00:02", "throughput": 5584.85, "total_tokens": 18635984} +{"current_steps": 37865, "total_steps": 37885, "loss": 0.0, "lr": 1.8719805796596487e-12, "epoch": 4.997360432889006, "percentage": 99.95, "elapsed_time": "0:55:37", "remaining_time": "0:00:01", "throughput": 5584.99, "total_tokens": 18638288} +{"current_steps": 37870, "total_steps": 37885, "loss": 0.0, "lr": 1.086682746231915e-12, "epoch": 4.998020324666754, "percentage": 99.96, "elapsed_time": "0:55:37", "remaining_time": "0:00:01", "throughput": 5585.17, "total_tokens": 18640720} +{"current_steps": 37875, "total_steps": 37885, "loss": 0.0, "lr": 5.136274408013008e-13, "epoch": 4.998680216444503, "percentage": 99.97, "elapsed_time": "0:55:37", "remaining_time": "0:00:00", "throughput": 5585.29, "total_tokens": 18642960} +{"current_steps": 37880, "total_steps": 37885, "loss": 0.0, "lr": 1.5281478493722745e-13, "epoch": 4.999340108222252, "percentage": 99.99, "elapsed_time": "0:55:38", "remaining_time": "0:00:00", "throughput": 5585.35, "total_tokens": 18645008} +{"current_steps": 37885, "total_steps": 37885, "loss": 0.0001, "lr": 4.244855245083556e-15, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:55:38", "remaining_time": "0:00:00", "throughput": 5585.43, "total_tokens": 18647328} +{"current_steps": 37885, "total_steps": 37885, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:56:14", "remaining_time": "0:00:00", "throughput": 5526.12, "total_tokens": 18647328} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..58b953a --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,60831 @@ +{ + "best_global_step": 5685, + "best_metric": 0.09698151051998138, + "best_model_checkpoint": "saves_bts_preliminary/base/llama-3.2-1b-instruct/train_sst2_42_1779194533/checkpoint-5685", + "epoch": 5.0, + "eval_steps": 1895, + "global_step": 37885, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006598917777484492, + "grad_norm": 442.6911926269531, + "learning_rate": 2.111375032990235e-09, + "loss": 1.4356, + "num_input_tokens_seen": 2240, + "step": 5 + }, + { + "epoch": 0.0013197835554968984, + "grad_norm": 450.9195861816406, + "learning_rate": 4.7505938242280285e-09, + "loss": 1.5327, + "num_input_tokens_seen": 4672, + "step": 10 + }, + { + "epoch": 0.0019796753332453477, + "grad_norm": 461.6313171386719, + "learning_rate": 7.389812615465822e-09, + "loss": 1.4513, + "num_input_tokens_seen": 7040, + "step": 15 + }, + { + "epoch": 0.002639567110993797, + "grad_norm": 494.7020568847656, + "learning_rate": 1.0029031406703616e-08, + "loss": 1.4726, + "num_input_tokens_seen": 9600, + "step": 20 + }, + { + "epoch": 0.0032994588887422464, + "grad_norm": 465.0445251464844, + "learning_rate": 1.2668250197941409e-08, + "loss": 1.4022, + "num_input_tokens_seen": 12160, + "step": 25 + }, + { + "epoch": 0.0039593506664906955, + "grad_norm": 437.6158447265625, + "learning_rate": 1.5307468989179204e-08, + "loss": 1.5128, + "num_input_tokens_seen": 14528, + "step": 30 + }, + { + "epoch": 0.004619242444239145, + "grad_norm": 426.5767517089844, + "learning_rate": 1.7946687780416997e-08, + "loss": 1.3482, + "num_input_tokens_seen": 16768, + "step": 35 + }, + { + "epoch": 0.005279134221987594, + "grad_norm": 475.7981262207031, + "learning_rate": 2.058590657165479e-08, + "loss": 1.4398, + "num_input_tokens_seen": 19264, + "step": 40 + }, + { + "epoch": 0.005939025999736044, + "grad_norm": 467.2344970703125, + "learning_rate": 2.3225125362892583e-08, + "loss": 1.4043, + "num_input_tokens_seen": 21632, + "step": 45 + }, + { + "epoch": 0.006598917777484493, + "grad_norm": 387.37567138671875, + "learning_rate": 2.5864344154130376e-08, + "loss": 1.3473, + "num_input_tokens_seen": 24000, + "step": 50 + }, + { + "epoch": 0.007258809555232942, + "grad_norm": 442.187255859375, + "learning_rate": 2.850356294536817e-08, + "loss": 1.427, + "num_input_tokens_seen": 26496, + "step": 55 + }, + { + "epoch": 0.007918701332981391, + "grad_norm": 441.4548645019531, + "learning_rate": 3.1142781736605966e-08, + "loss": 1.4604, + "num_input_tokens_seen": 29120, + "step": 60 + }, + { + "epoch": 0.008578593110729841, + "grad_norm": 411.3653259277344, + "learning_rate": 3.378200052784376e-08, + "loss": 1.2511, + "num_input_tokens_seen": 31744, + "step": 65 + }, + { + "epoch": 0.00923848488847829, + "grad_norm": 390.2351989746094, + "learning_rate": 3.6421219319081546e-08, + "loss": 1.2233, + "num_input_tokens_seen": 34176, + "step": 70 + }, + { + "epoch": 0.009898376666226739, + "grad_norm": 400.3909912109375, + "learning_rate": 3.9060438110319346e-08, + "loss": 1.2987, + "num_input_tokens_seen": 36864, + "step": 75 + }, + { + "epoch": 0.010558268443975187, + "grad_norm": 388.7707824707031, + "learning_rate": 4.169965690155713e-08, + "loss": 1.1757, + "num_input_tokens_seen": 39424, + "step": 80 + }, + { + "epoch": 0.011218160221723637, + "grad_norm": 396.9801025390625, + "learning_rate": 4.433887569279493e-08, + "loss": 1.1551, + "num_input_tokens_seen": 42112, + "step": 85 + }, + { + "epoch": 0.011878051999472087, + "grad_norm": 278.70367431640625, + "learning_rate": 4.6978094484032725e-08, + "loss": 1.0816, + "num_input_tokens_seen": 44544, + "step": 90 + }, + { + "epoch": 0.012537943777220536, + "grad_norm": 273.1611022949219, + "learning_rate": 4.961731327527052e-08, + "loss": 0.8812, + "num_input_tokens_seen": 47104, + "step": 95 + }, + { + "epoch": 0.013197835554968985, + "grad_norm": 244.677978515625, + "learning_rate": 5.225653206650831e-08, + "loss": 0.8313, + "num_input_tokens_seen": 49664, + "step": 100 + }, + { + "epoch": 0.013857727332717434, + "grad_norm": 235.11949157714844, + "learning_rate": 5.4895750857746105e-08, + "loss": 0.7819, + "num_input_tokens_seen": 52352, + "step": 105 + }, + { + "epoch": 0.014517619110465884, + "grad_norm": 250.9341278076172, + "learning_rate": 5.75349696489839e-08, + "loss": 0.746, + "num_input_tokens_seen": 54720, + "step": 110 + }, + { + "epoch": 0.015177510888214334, + "grad_norm": 217.72210693359375, + "learning_rate": 6.01741884402217e-08, + "loss": 0.7562, + "num_input_tokens_seen": 57152, + "step": 115 + }, + { + "epoch": 0.015837402665962782, + "grad_norm": 203.7813720703125, + "learning_rate": 6.281340723145948e-08, + "loss": 0.6604, + "num_input_tokens_seen": 59776, + "step": 120 + }, + { + "epoch": 0.01649729444371123, + "grad_norm": 61.57086181640625, + "learning_rate": 6.545262602269728e-08, + "loss": 0.4326, + "num_input_tokens_seen": 62464, + "step": 125 + }, + { + "epoch": 0.017157186221459682, + "grad_norm": 43.6219367980957, + "learning_rate": 6.809184481393507e-08, + "loss": 0.3566, + "num_input_tokens_seen": 65088, + "step": 130 + }, + { + "epoch": 0.01781707799920813, + "grad_norm": 30.025165557861328, + "learning_rate": 7.073106360517287e-08, + "loss": 0.3399, + "num_input_tokens_seen": 67776, + "step": 135 + }, + { + "epoch": 0.01847696977695658, + "grad_norm": 34.46451950073242, + "learning_rate": 7.337028239641066e-08, + "loss": 0.3479, + "num_input_tokens_seen": 70400, + "step": 140 + }, + { + "epoch": 0.01913686155470503, + "grad_norm": 75.49683380126953, + "learning_rate": 7.600950118764846e-08, + "loss": 0.3225, + "num_input_tokens_seen": 72704, + "step": 145 + }, + { + "epoch": 0.019796753332453478, + "grad_norm": 35.20500183105469, + "learning_rate": 7.864871997888626e-08, + "loss": 0.3282, + "num_input_tokens_seen": 75136, + "step": 150 + }, + { + "epoch": 0.020456645110201926, + "grad_norm": 26.75568962097168, + "learning_rate": 8.128793877012403e-08, + "loss": 0.3013, + "num_input_tokens_seen": 77632, + "step": 155 + }, + { + "epoch": 0.021116536887950375, + "grad_norm": 19.496416091918945, + "learning_rate": 8.392715756136183e-08, + "loss": 0.3084, + "num_input_tokens_seen": 80320, + "step": 160 + }, + { + "epoch": 0.021776428665698826, + "grad_norm": 66.57316589355469, + "learning_rate": 8.656637635259963e-08, + "loss": 0.3253, + "num_input_tokens_seen": 82752, + "step": 165 + }, + { + "epoch": 0.022436320443447275, + "grad_norm": 51.820770263671875, + "learning_rate": 8.920559514383743e-08, + "loss": 0.2936, + "num_input_tokens_seen": 85248, + "step": 170 + }, + { + "epoch": 0.023096212221195723, + "grad_norm": 64.85391235351562, + "learning_rate": 9.184481393507522e-08, + "loss": 0.3018, + "num_input_tokens_seen": 87872, + "step": 175 + }, + { + "epoch": 0.023756103998944175, + "grad_norm": 29.103076934814453, + "learning_rate": 9.4484032726313e-08, + "loss": 0.2907, + "num_input_tokens_seen": 90368, + "step": 180 + }, + { + "epoch": 0.024415995776692623, + "grad_norm": 62.926822662353516, + "learning_rate": 9.71232515175508e-08, + "loss": 0.2362, + "num_input_tokens_seen": 92928, + "step": 185 + }, + { + "epoch": 0.02507588755444107, + "grad_norm": 40.194679260253906, + "learning_rate": 9.976247030878859e-08, + "loss": 0.2169, + "num_input_tokens_seen": 95296, + "step": 190 + }, + { + "epoch": 0.02573577933218952, + "grad_norm": 55.39727783203125, + "learning_rate": 1.0240168910002639e-07, + "loss": 0.204, + "num_input_tokens_seen": 97984, + "step": 195 + }, + { + "epoch": 0.02639567110993797, + "grad_norm": 92.27530670166016, + "learning_rate": 1.0504090789126419e-07, + "loss": 0.2406, + "num_input_tokens_seen": 100352, + "step": 200 + }, + { + "epoch": 0.02705556288768642, + "grad_norm": 25.1845703125, + "learning_rate": 1.0768012668250196e-07, + "loss": 0.2504, + "num_input_tokens_seen": 102464, + "step": 205 + }, + { + "epoch": 0.027715454665434867, + "grad_norm": 27.743425369262695, + "learning_rate": 1.1031934547373976e-07, + "loss": 0.1825, + "num_input_tokens_seen": 105088, + "step": 210 + }, + { + "epoch": 0.02837534644318332, + "grad_norm": 78.66401672363281, + "learning_rate": 1.1295856426497756e-07, + "loss": 0.203, + "num_input_tokens_seen": 107648, + "step": 215 + }, + { + "epoch": 0.029035238220931767, + "grad_norm": 70.38174438476562, + "learning_rate": 1.1559778305621536e-07, + "loss": 0.1882, + "num_input_tokens_seen": 110144, + "step": 220 + }, + { + "epoch": 0.029695129998680216, + "grad_norm": 83.74911499023438, + "learning_rate": 1.1823700184745315e-07, + "loss": 0.202, + "num_input_tokens_seen": 112576, + "step": 225 + }, + { + "epoch": 0.030355021776428667, + "grad_norm": 83.70970153808594, + "learning_rate": 1.2087622063869096e-07, + "loss": 0.2255, + "num_input_tokens_seen": 115264, + "step": 230 + }, + { + "epoch": 0.031014913554177116, + "grad_norm": 26.91559410095215, + "learning_rate": 1.2351543942992873e-07, + "loss": 0.1494, + "num_input_tokens_seen": 117888, + "step": 235 + }, + { + "epoch": 0.031674805331925564, + "grad_norm": 57.33741760253906, + "learning_rate": 1.2615465822116653e-07, + "loss": 0.0743, + "num_input_tokens_seen": 120320, + "step": 240 + }, + { + "epoch": 0.032334697109674015, + "grad_norm": 154.66212463378906, + "learning_rate": 1.2879387701240433e-07, + "loss": 0.1968, + "num_input_tokens_seen": 122688, + "step": 245 + }, + { + "epoch": 0.03299458888742246, + "grad_norm": 89.07099914550781, + "learning_rate": 1.314330958036421e-07, + "loss": 0.1802, + "num_input_tokens_seen": 125248, + "step": 250 + }, + { + "epoch": 0.03365448066517091, + "grad_norm": 78.18814849853516, + "learning_rate": 1.340723145948799e-07, + "loss": 0.149, + "num_input_tokens_seen": 127680, + "step": 255 + }, + { + "epoch": 0.034314372442919364, + "grad_norm": 48.04738235473633, + "learning_rate": 1.367115333861177e-07, + "loss": 0.0907, + "num_input_tokens_seen": 130496, + "step": 260 + }, + { + "epoch": 0.03497426422066781, + "grad_norm": 81.23184967041016, + "learning_rate": 1.393507521773555e-07, + "loss": 0.1709, + "num_input_tokens_seen": 132992, + "step": 265 + }, + { + "epoch": 0.03563415599841626, + "grad_norm": 57.03975296020508, + "learning_rate": 1.419899709685933e-07, + "loss": 0.1453, + "num_input_tokens_seen": 135040, + "step": 270 + }, + { + "epoch": 0.03629404777616471, + "grad_norm": 125.083251953125, + "learning_rate": 1.4462918975983108e-07, + "loss": 0.1749, + "num_input_tokens_seen": 137600, + "step": 275 + }, + { + "epoch": 0.03695393955391316, + "grad_norm": 95.15032958984375, + "learning_rate": 1.4726840855106888e-07, + "loss": 0.1575, + "num_input_tokens_seen": 139904, + "step": 280 + }, + { + "epoch": 0.03761383133166161, + "grad_norm": 44.138343811035156, + "learning_rate": 1.4990762734230665e-07, + "loss": 0.1957, + "num_input_tokens_seen": 142016, + "step": 285 + }, + { + "epoch": 0.03827372310941006, + "grad_norm": 17.540449142456055, + "learning_rate": 1.5254684613354445e-07, + "loss": 0.0195, + "num_input_tokens_seen": 144576, + "step": 290 + }, + { + "epoch": 0.038933614887158505, + "grad_norm": 85.709228515625, + "learning_rate": 1.5518606492478225e-07, + "loss": 0.086, + "num_input_tokens_seen": 146880, + "step": 295 + }, + { + "epoch": 0.039593506664906956, + "grad_norm": 155.11170959472656, + "learning_rate": 1.5782528371602005e-07, + "loss": 0.2891, + "num_input_tokens_seen": 149184, + "step": 300 + }, + { + "epoch": 0.0402533984426554, + "grad_norm": 5.010693073272705, + "learning_rate": 1.6046450250725785e-07, + "loss": 0.1615, + "num_input_tokens_seen": 151296, + "step": 305 + }, + { + "epoch": 0.04091329022040385, + "grad_norm": 55.62345504760742, + "learning_rate": 1.6310372129849565e-07, + "loss": 0.2055, + "num_input_tokens_seen": 153664, + "step": 310 + }, + { + "epoch": 0.041573181998152305, + "grad_norm": 31.857683181762695, + "learning_rate": 1.6574294008973345e-07, + "loss": 0.2078, + "num_input_tokens_seen": 156032, + "step": 315 + }, + { + "epoch": 0.04223307377590075, + "grad_norm": 7.426979064941406, + "learning_rate": 1.6838215888097122e-07, + "loss": 0.1392, + "num_input_tokens_seen": 158528, + "step": 320 + }, + { + "epoch": 0.0428929655536492, + "grad_norm": 22.063268661499023, + "learning_rate": 1.71021377672209e-07, + "loss": 0.1113, + "num_input_tokens_seen": 160704, + "step": 325 + }, + { + "epoch": 0.04355285733139765, + "grad_norm": 40.419403076171875, + "learning_rate": 1.736605964634468e-07, + "loss": 0.178, + "num_input_tokens_seen": 163072, + "step": 330 + }, + { + "epoch": 0.0442127491091461, + "grad_norm": 111.098876953125, + "learning_rate": 1.762998152546846e-07, + "loss": 0.1961, + "num_input_tokens_seen": 165568, + "step": 335 + }, + { + "epoch": 0.04487264088689455, + "grad_norm": 5.24355936050415, + "learning_rate": 1.789390340459224e-07, + "loss": 0.2253, + "num_input_tokens_seen": 167936, + "step": 340 + }, + { + "epoch": 0.045532532664643, + "grad_norm": 191.69830322265625, + "learning_rate": 1.815782528371602e-07, + "loss": 0.174, + "num_input_tokens_seen": 170176, + "step": 345 + }, + { + "epoch": 0.046192424442391446, + "grad_norm": 140.1460418701172, + "learning_rate": 1.84217471628398e-07, + "loss": 0.1717, + "num_input_tokens_seen": 172352, + "step": 350 + }, + { + "epoch": 0.0468523162201399, + "grad_norm": 72.49575805664062, + "learning_rate": 1.8685669041963577e-07, + "loss": 0.1506, + "num_input_tokens_seen": 175168, + "step": 355 + }, + { + "epoch": 0.04751220799788835, + "grad_norm": 37.27299499511719, + "learning_rate": 1.8949590921087357e-07, + "loss": 0.0852, + "num_input_tokens_seen": 177856, + "step": 360 + }, + { + "epoch": 0.048172099775636794, + "grad_norm": 95.40577697753906, + "learning_rate": 1.9213512800211137e-07, + "loss": 0.2049, + "num_input_tokens_seen": 180160, + "step": 365 + }, + { + "epoch": 0.048831991553385246, + "grad_norm": 67.52294921875, + "learning_rate": 1.9477434679334917e-07, + "loss": 0.1772, + "num_input_tokens_seen": 182784, + "step": 370 + }, + { + "epoch": 0.0494918833311337, + "grad_norm": 12.928277969360352, + "learning_rate": 1.9741356558458697e-07, + "loss": 0.0558, + "num_input_tokens_seen": 185024, + "step": 375 + }, + { + "epoch": 0.05015177510888214, + "grad_norm": 96.16371154785156, + "learning_rate": 2.0005278437582474e-07, + "loss": 0.2301, + "num_input_tokens_seen": 187712, + "step": 380 + }, + { + "epoch": 0.050811666886630594, + "grad_norm": 131.8448028564453, + "learning_rate": 2.0269200316706254e-07, + "loss": 0.179, + "num_input_tokens_seen": 190400, + "step": 385 + }, + { + "epoch": 0.05147155866437904, + "grad_norm": 83.01555633544922, + "learning_rate": 2.0533122195830032e-07, + "loss": 0.1703, + "num_input_tokens_seen": 193280, + "step": 390 + }, + { + "epoch": 0.05213145044212749, + "grad_norm": 68.2806625366211, + "learning_rate": 2.0797044074953812e-07, + "loss": 0.238, + "num_input_tokens_seen": 195584, + "step": 395 + }, + { + "epoch": 0.05279134221987594, + "grad_norm": 8.89856243133545, + "learning_rate": 2.1060965954077591e-07, + "loss": 0.0807, + "num_input_tokens_seen": 198208, + "step": 400 + }, + { + "epoch": 0.05345123399762439, + "grad_norm": 6.929687023162842, + "learning_rate": 2.1324887833201371e-07, + "loss": 0.1271, + "num_input_tokens_seen": 200704, + "step": 405 + }, + { + "epoch": 0.05411112577537284, + "grad_norm": 110.80956268310547, + "learning_rate": 2.1588809712325151e-07, + "loss": 0.1709, + "num_input_tokens_seen": 203136, + "step": 410 + }, + { + "epoch": 0.05477101755312129, + "grad_norm": 12.411187171936035, + "learning_rate": 2.1852731591448931e-07, + "loss": 0.1371, + "num_input_tokens_seen": 205504, + "step": 415 + }, + { + "epoch": 0.055430909330869735, + "grad_norm": 39.06804656982422, + "learning_rate": 2.2116653470572711e-07, + "loss": 0.2497, + "num_input_tokens_seen": 208000, + "step": 420 + }, + { + "epoch": 0.056090801108618187, + "grad_norm": 95.63874816894531, + "learning_rate": 2.238057534969649e-07, + "loss": 0.1593, + "num_input_tokens_seen": 210560, + "step": 425 + }, + { + "epoch": 0.05675069288636664, + "grad_norm": 116.45384216308594, + "learning_rate": 2.2644497228820266e-07, + "loss": 0.2053, + "num_input_tokens_seen": 213056, + "step": 430 + }, + { + "epoch": 0.05741058466411508, + "grad_norm": 88.33666229248047, + "learning_rate": 2.2908419107944046e-07, + "loss": 0.1709, + "num_input_tokens_seen": 215296, + "step": 435 + }, + { + "epoch": 0.058070476441863535, + "grad_norm": 1.3807058334350586, + "learning_rate": 2.3172340987067826e-07, + "loss": 0.0204, + "num_input_tokens_seen": 217472, + "step": 440 + }, + { + "epoch": 0.058730368219611986, + "grad_norm": 77.15796661376953, + "learning_rate": 2.3436262866191606e-07, + "loss": 0.0669, + "num_input_tokens_seen": 219968, + "step": 445 + }, + { + "epoch": 0.05939025999736043, + "grad_norm": 1.4172821044921875, + "learning_rate": 2.3700184745315386e-07, + "loss": 0.1046, + "num_input_tokens_seen": 222592, + "step": 450 + }, + { + "epoch": 0.06005015177510888, + "grad_norm": 3.821490526199341, + "learning_rate": 2.3964106624439166e-07, + "loss": 0.1579, + "num_input_tokens_seen": 224768, + "step": 455 + }, + { + "epoch": 0.060710043552857335, + "grad_norm": 3.3072474002838135, + "learning_rate": 2.4228028503562943e-07, + "loss": 0.238, + "num_input_tokens_seen": 227264, + "step": 460 + }, + { + "epoch": 0.06136993533060578, + "grad_norm": 169.94236755371094, + "learning_rate": 2.4491950382686726e-07, + "loss": 0.0656, + "num_input_tokens_seen": 229760, + "step": 465 + }, + { + "epoch": 0.06202982710835423, + "grad_norm": 45.810054779052734, + "learning_rate": 2.4755872261810503e-07, + "loss": 0.2998, + "num_input_tokens_seen": 232000, + "step": 470 + }, + { + "epoch": 0.06268971888610268, + "grad_norm": 2.529078245162964, + "learning_rate": 2.501979414093428e-07, + "loss": 0.2337, + "num_input_tokens_seen": 234176, + "step": 475 + }, + { + "epoch": 0.06334961066385113, + "grad_norm": 1.3976417779922485, + "learning_rate": 2.528371602005806e-07, + "loss": 0.1524, + "num_input_tokens_seen": 236736, + "step": 480 + }, + { + "epoch": 0.06400950244159957, + "grad_norm": 0.11078206449747086, + "learning_rate": 2.554763789918184e-07, + "loss": 0.0146, + "num_input_tokens_seen": 239104, + "step": 485 + }, + { + "epoch": 0.06466939421934803, + "grad_norm": 178.26333618164062, + "learning_rate": 2.581155977830562e-07, + "loss": 0.4542, + "num_input_tokens_seen": 241600, + "step": 490 + }, + { + "epoch": 0.06532928599709648, + "grad_norm": 228.8087158203125, + "learning_rate": 2.60754816574294e-07, + "loss": 0.5945, + "num_input_tokens_seen": 244160, + "step": 495 + }, + { + "epoch": 0.06598917777484492, + "grad_norm": 163.3988494873047, + "learning_rate": 2.633940353655318e-07, + "loss": 0.3712, + "num_input_tokens_seen": 246464, + "step": 500 + }, + { + "epoch": 0.06664906955259338, + "grad_norm": 1.6546058654785156, + "learning_rate": 2.660332541567696e-07, + "loss": 0.1548, + "num_input_tokens_seen": 248960, + "step": 505 + }, + { + "epoch": 0.06730896133034182, + "grad_norm": 94.28485870361328, + "learning_rate": 2.686724729480074e-07, + "loss": 0.1575, + "num_input_tokens_seen": 251392, + "step": 510 + }, + { + "epoch": 0.06796885310809027, + "grad_norm": 78.42328643798828, + "learning_rate": 2.7131169173924515e-07, + "loss": 0.1289, + "num_input_tokens_seen": 253888, + "step": 515 + }, + { + "epoch": 0.06862874488583873, + "grad_norm": 104.21388244628906, + "learning_rate": 2.73950910530483e-07, + "loss": 0.1912, + "num_input_tokens_seen": 256384, + "step": 520 + }, + { + "epoch": 0.06928863666358717, + "grad_norm": 99.578369140625, + "learning_rate": 2.7659012932172075e-07, + "loss": 0.2614, + "num_input_tokens_seen": 258496, + "step": 525 + }, + { + "epoch": 0.06994852844133562, + "grad_norm": 0.4172161817550659, + "learning_rate": 2.792293481129586e-07, + "loss": 0.1036, + "num_input_tokens_seen": 260864, + "step": 530 + }, + { + "epoch": 0.07060842021908408, + "grad_norm": 98.31641387939453, + "learning_rate": 2.8186856690419635e-07, + "loss": 0.0465, + "num_input_tokens_seen": 263360, + "step": 535 + }, + { + "epoch": 0.07126831199683252, + "grad_norm": 12.86385440826416, + "learning_rate": 2.845077856954342e-07, + "loss": 0.097, + "num_input_tokens_seen": 266112, + "step": 540 + }, + { + "epoch": 0.07192820377458096, + "grad_norm": 103.72950744628906, + "learning_rate": 2.8714700448667195e-07, + "loss": 0.1353, + "num_input_tokens_seen": 268544, + "step": 545 + }, + { + "epoch": 0.07258809555232942, + "grad_norm": 1.9934414625167847, + "learning_rate": 2.897862232779097e-07, + "loss": 0.2927, + "num_input_tokens_seen": 270912, + "step": 550 + }, + { + "epoch": 0.07324798733007787, + "grad_norm": 89.35478210449219, + "learning_rate": 2.9242544206914755e-07, + "loss": 0.042, + "num_input_tokens_seen": 273664, + "step": 555 + }, + { + "epoch": 0.07390787910782631, + "grad_norm": 51.353797912597656, + "learning_rate": 2.950646608603853e-07, + "loss": 0.1713, + "num_input_tokens_seen": 276160, + "step": 560 + }, + { + "epoch": 0.07456777088557477, + "grad_norm": 67.62535095214844, + "learning_rate": 2.977038796516231e-07, + "loss": 0.1153, + "num_input_tokens_seen": 278784, + "step": 565 + }, + { + "epoch": 0.07522766266332322, + "grad_norm": 61.236839294433594, + "learning_rate": 3.0034309844286087e-07, + "loss": 0.1525, + "num_input_tokens_seen": 281152, + "step": 570 + }, + { + "epoch": 0.07588755444107166, + "grad_norm": 91.33195495605469, + "learning_rate": 3.029823172340987e-07, + "loss": 0.2213, + "num_input_tokens_seen": 283456, + "step": 575 + }, + { + "epoch": 0.07654744621882012, + "grad_norm": 77.89016723632812, + "learning_rate": 3.0562153602533647e-07, + "loss": 0.1509, + "num_input_tokens_seen": 286016, + "step": 580 + }, + { + "epoch": 0.07720733799656856, + "grad_norm": 36.07801818847656, + "learning_rate": 3.0826075481657424e-07, + "loss": 0.1542, + "num_input_tokens_seen": 288448, + "step": 585 + }, + { + "epoch": 0.07786722977431701, + "grad_norm": 8.650485038757324, + "learning_rate": 3.1089997360781207e-07, + "loss": 0.136, + "num_input_tokens_seen": 290816, + "step": 590 + }, + { + "epoch": 0.07852712155206547, + "grad_norm": 87.19075775146484, + "learning_rate": 3.1353919239904984e-07, + "loss": 0.0768, + "num_input_tokens_seen": 293504, + "step": 595 + }, + { + "epoch": 0.07918701332981391, + "grad_norm": 87.9682388305664, + "learning_rate": 3.1617841119028767e-07, + "loss": 0.1251, + "num_input_tokens_seen": 295744, + "step": 600 + }, + { + "epoch": 0.07984690510756236, + "grad_norm": 22.17746925354004, + "learning_rate": 3.1881762998152544e-07, + "loss": 0.1154, + "num_input_tokens_seen": 298112, + "step": 605 + }, + { + "epoch": 0.0805067968853108, + "grad_norm": 110.7480239868164, + "learning_rate": 3.2145684877276327e-07, + "loss": 0.1348, + "num_input_tokens_seen": 300608, + "step": 610 + }, + { + "epoch": 0.08116668866305926, + "grad_norm": 0.6900279521942139, + "learning_rate": 3.2409606756400104e-07, + "loss": 0.0238, + "num_input_tokens_seen": 303360, + "step": 615 + }, + { + "epoch": 0.0818265804408077, + "grad_norm": 5.3087968826293945, + "learning_rate": 3.267352863552388e-07, + "loss": 0.1818, + "num_input_tokens_seen": 305920, + "step": 620 + }, + { + "epoch": 0.08248647221855615, + "grad_norm": 3.0559468269348145, + "learning_rate": 3.2937450514647664e-07, + "loss": 0.1448, + "num_input_tokens_seen": 308416, + "step": 625 + }, + { + "epoch": 0.08314636399630461, + "grad_norm": 2.740339756011963, + "learning_rate": 3.320137239377144e-07, + "loss": 0.0859, + "num_input_tokens_seen": 310848, + "step": 630 + }, + { + "epoch": 0.08380625577405305, + "grad_norm": 76.87493133544922, + "learning_rate": 3.3465294272895224e-07, + "loss": 0.1894, + "num_input_tokens_seen": 313408, + "step": 635 + }, + { + "epoch": 0.0844661475518015, + "grad_norm": 5.294236183166504, + "learning_rate": 3.3729216152019e-07, + "loss": 0.2527, + "num_input_tokens_seen": 315904, + "step": 640 + }, + { + "epoch": 0.08512603932954996, + "grad_norm": 150.8271026611328, + "learning_rate": 3.3993138031142784e-07, + "loss": 0.2999, + "num_input_tokens_seen": 318080, + "step": 645 + }, + { + "epoch": 0.0857859311072984, + "grad_norm": 68.92874908447266, + "learning_rate": 3.425705991026656e-07, + "loss": 0.1142, + "num_input_tokens_seen": 320256, + "step": 650 + }, + { + "epoch": 0.08644582288504685, + "grad_norm": 1.1438566446304321, + "learning_rate": 3.452098178939034e-07, + "loss": 0.0054, + "num_input_tokens_seen": 322496, + "step": 655 + }, + { + "epoch": 0.0871057146627953, + "grad_norm": 144.06700134277344, + "learning_rate": 3.478490366851412e-07, + "loss": 0.133, + "num_input_tokens_seen": 325120, + "step": 660 + }, + { + "epoch": 0.08776560644054375, + "grad_norm": 199.6498565673828, + "learning_rate": 3.50488255476379e-07, + "loss": 0.0938, + "num_input_tokens_seen": 327296, + "step": 665 + }, + { + "epoch": 0.0884254982182922, + "grad_norm": 106.60006713867188, + "learning_rate": 3.5312747426761676e-07, + "loss": 0.492, + "num_input_tokens_seen": 329664, + "step": 670 + }, + { + "epoch": 0.08908538999604065, + "grad_norm": 58.48418426513672, + "learning_rate": 3.5576669305885453e-07, + "loss": 0.2004, + "num_input_tokens_seen": 332416, + "step": 675 + }, + { + "epoch": 0.0897452817737891, + "grad_norm": 12.844401359558105, + "learning_rate": 3.5840591185009236e-07, + "loss": 0.0255, + "num_input_tokens_seen": 334976, + "step": 680 + }, + { + "epoch": 0.09040517355153754, + "grad_norm": 13.640639305114746, + "learning_rate": 3.6104513064133013e-07, + "loss": 0.1744, + "num_input_tokens_seen": 337472, + "step": 685 + }, + { + "epoch": 0.091065065329286, + "grad_norm": 23.306495666503906, + "learning_rate": 3.636843494325679e-07, + "loss": 0.0702, + "num_input_tokens_seen": 339904, + "step": 690 + }, + { + "epoch": 0.09172495710703445, + "grad_norm": 0.2791350781917572, + "learning_rate": 3.6632356822380573e-07, + "loss": 0.1571, + "num_input_tokens_seen": 342272, + "step": 695 + }, + { + "epoch": 0.09238484888478289, + "grad_norm": 47.02802276611328, + "learning_rate": 3.689627870150435e-07, + "loss": 0.0945, + "num_input_tokens_seen": 344640, + "step": 700 + }, + { + "epoch": 0.09304474066253135, + "grad_norm": 0.08177275210618973, + "learning_rate": 3.7160200580628133e-07, + "loss": 0.1118, + "num_input_tokens_seen": 347072, + "step": 705 + }, + { + "epoch": 0.0937046324402798, + "grad_norm": 0.10263694822788239, + "learning_rate": 3.742412245975191e-07, + "loss": 0.1896, + "num_input_tokens_seen": 349696, + "step": 710 + }, + { + "epoch": 0.09436452421802824, + "grad_norm": 167.6782684326172, + "learning_rate": 3.7688044338875693e-07, + "loss": 0.1602, + "num_input_tokens_seen": 352256, + "step": 715 + }, + { + "epoch": 0.0950244159957767, + "grad_norm": 0.09177152812480927, + "learning_rate": 3.795196621799947e-07, + "loss": 0.2044, + "num_input_tokens_seen": 355008, + "step": 720 + }, + { + "epoch": 0.09568430777352514, + "grad_norm": 79.8562240600586, + "learning_rate": 3.821588809712325e-07, + "loss": 0.4465, + "num_input_tokens_seen": 357376, + "step": 725 + }, + { + "epoch": 0.09634419955127359, + "grad_norm": 131.0872344970703, + "learning_rate": 3.847980997624703e-07, + "loss": 0.2807, + "num_input_tokens_seen": 359680, + "step": 730 + }, + { + "epoch": 0.09700409132902205, + "grad_norm": 64.67192840576172, + "learning_rate": 3.874373185537081e-07, + "loss": 0.0482, + "num_input_tokens_seen": 362176, + "step": 735 + }, + { + "epoch": 0.09766398310677049, + "grad_norm": 1.6336522102355957, + "learning_rate": 3.900765373449459e-07, + "loss": 0.0361, + "num_input_tokens_seen": 365056, + "step": 740 + }, + { + "epoch": 0.09832387488451894, + "grad_norm": 3.4322988986968994, + "learning_rate": 3.927157561361837e-07, + "loss": 0.1913, + "num_input_tokens_seen": 367488, + "step": 745 + }, + { + "epoch": 0.0989837666622674, + "grad_norm": 156.1293487548828, + "learning_rate": 3.953549749274215e-07, + "loss": 0.2768, + "num_input_tokens_seen": 369792, + "step": 750 + }, + { + "epoch": 0.09964365844001584, + "grad_norm": 12.225781440734863, + "learning_rate": 3.979941937186593e-07, + "loss": 0.0962, + "num_input_tokens_seen": 372160, + "step": 755 + }, + { + "epoch": 0.10030355021776428, + "grad_norm": 66.30107116699219, + "learning_rate": 4.0063341250989705e-07, + "loss": 0.1613, + "num_input_tokens_seen": 374272, + "step": 760 + }, + { + "epoch": 0.10096344199551274, + "grad_norm": 156.31951904296875, + "learning_rate": 4.032726313011349e-07, + "loss": 0.1893, + "num_input_tokens_seen": 376704, + "step": 765 + }, + { + "epoch": 0.10162333377326119, + "grad_norm": 221.8079071044922, + "learning_rate": 4.0591185009237265e-07, + "loss": 0.321, + "num_input_tokens_seen": 379136, + "step": 770 + }, + { + "epoch": 0.10228322555100963, + "grad_norm": 53.741859436035156, + "learning_rate": 4.085510688836104e-07, + "loss": 0.0868, + "num_input_tokens_seen": 381568, + "step": 775 + }, + { + "epoch": 0.10294311732875808, + "grad_norm": 2.2501022815704346, + "learning_rate": 4.111902876748482e-07, + "loss": 0.2393, + "num_input_tokens_seen": 384128, + "step": 780 + }, + { + "epoch": 0.10360300910650654, + "grad_norm": 64.6779556274414, + "learning_rate": 4.13829506466086e-07, + "loss": 0.3035, + "num_input_tokens_seen": 386304, + "step": 785 + }, + { + "epoch": 0.10426290088425498, + "grad_norm": 149.44509887695312, + "learning_rate": 4.164687252573238e-07, + "loss": 0.2872, + "num_input_tokens_seen": 388864, + "step": 790 + }, + { + "epoch": 0.10492279266200343, + "grad_norm": 40.67619323730469, + "learning_rate": 4.1910794404856157e-07, + "loss": 0.0753, + "num_input_tokens_seen": 391104, + "step": 795 + }, + { + "epoch": 0.10558268443975188, + "grad_norm": 43.18121337890625, + "learning_rate": 4.217471628397994e-07, + "loss": 0.1116, + "num_input_tokens_seen": 393856, + "step": 800 + }, + { + "epoch": 0.10624257621750033, + "grad_norm": 51.915523529052734, + "learning_rate": 4.2438638163103717e-07, + "loss": 0.1934, + "num_input_tokens_seen": 396352, + "step": 805 + }, + { + "epoch": 0.10690246799524877, + "grad_norm": 2.390080451965332, + "learning_rate": 4.27025600422275e-07, + "loss": 0.1459, + "num_input_tokens_seen": 398592, + "step": 810 + }, + { + "epoch": 0.10756235977299723, + "grad_norm": 87.79183197021484, + "learning_rate": 4.2966481921351277e-07, + "loss": 0.0887, + "num_input_tokens_seen": 401088, + "step": 815 + }, + { + "epoch": 0.10822225155074568, + "grad_norm": 17.97756004333496, + "learning_rate": 4.323040380047506e-07, + "loss": 0.0521, + "num_input_tokens_seen": 403456, + "step": 820 + }, + { + "epoch": 0.10888214332849412, + "grad_norm": 30.060583114624023, + "learning_rate": 4.3494325679598837e-07, + "loss": 0.2846, + "num_input_tokens_seen": 405696, + "step": 825 + }, + { + "epoch": 0.10954203510624258, + "grad_norm": 58.16769790649414, + "learning_rate": 4.3758247558722614e-07, + "loss": 0.2486, + "num_input_tokens_seen": 408128, + "step": 830 + }, + { + "epoch": 0.11020192688399102, + "grad_norm": 2.298959493637085, + "learning_rate": 4.4022169437846397e-07, + "loss": 0.0355, + "num_input_tokens_seen": 411008, + "step": 835 + }, + { + "epoch": 0.11086181866173947, + "grad_norm": 4.175866603851318, + "learning_rate": 4.4286091316970174e-07, + "loss": 0.3422, + "num_input_tokens_seen": 413312, + "step": 840 + }, + { + "epoch": 0.11152171043948793, + "grad_norm": 0.37670063972473145, + "learning_rate": 4.4550013196093957e-07, + "loss": 0.124, + "num_input_tokens_seen": 415616, + "step": 845 + }, + { + "epoch": 0.11218160221723637, + "grad_norm": 105.30368041992188, + "learning_rate": 4.4813935075217734e-07, + "loss": 0.1853, + "num_input_tokens_seen": 418368, + "step": 850 + }, + { + "epoch": 0.11284149399498482, + "grad_norm": 3.088155508041382, + "learning_rate": 4.5077856954341517e-07, + "loss": 0.0833, + "num_input_tokens_seen": 420864, + "step": 855 + }, + { + "epoch": 0.11350138577273328, + "grad_norm": 196.9340057373047, + "learning_rate": 4.5341778833465294e-07, + "loss": 0.222, + "num_input_tokens_seen": 423040, + "step": 860 + }, + { + "epoch": 0.11416127755048172, + "grad_norm": 122.30973052978516, + "learning_rate": 4.560570071258907e-07, + "loss": 0.3516, + "num_input_tokens_seen": 425344, + "step": 865 + }, + { + "epoch": 0.11482116932823017, + "grad_norm": 5.073932647705078, + "learning_rate": 4.5869622591712854e-07, + "loss": 0.1296, + "num_input_tokens_seen": 427968, + "step": 870 + }, + { + "epoch": 0.11548106110597862, + "grad_norm": 181.66859436035156, + "learning_rate": 4.613354447083663e-07, + "loss": 0.232, + "num_input_tokens_seen": 430592, + "step": 875 + }, + { + "epoch": 0.11614095288372707, + "grad_norm": 82.50604248046875, + "learning_rate": 4.639746634996041e-07, + "loss": 0.4212, + "num_input_tokens_seen": 433152, + "step": 880 + }, + { + "epoch": 0.11680084466147551, + "grad_norm": 102.42996215820312, + "learning_rate": 4.6661388229084186e-07, + "loss": 0.0185, + "num_input_tokens_seen": 435456, + "step": 885 + }, + { + "epoch": 0.11746073643922397, + "grad_norm": 161.98501586914062, + "learning_rate": 4.692531010820797e-07, + "loss": 0.2316, + "num_input_tokens_seen": 438080, + "step": 890 + }, + { + "epoch": 0.11812062821697242, + "grad_norm": 184.88845825195312, + "learning_rate": 4.7189231987331746e-07, + "loss": 0.2135, + "num_input_tokens_seen": 440640, + "step": 895 + }, + { + "epoch": 0.11878051999472086, + "grad_norm": 3.3446881771087646, + "learning_rate": 4.7453153866455523e-07, + "loss": 0.0402, + "num_input_tokens_seen": 443008, + "step": 900 + }, + { + "epoch": 0.11944041177246932, + "grad_norm": 59.780128479003906, + "learning_rate": 4.771707574557931e-07, + "loss": 0.1973, + "num_input_tokens_seen": 445312, + "step": 905 + }, + { + "epoch": 0.12010030355021777, + "grad_norm": 0.7624008059501648, + "learning_rate": 4.798099762470308e-07, + "loss": 0.105, + "num_input_tokens_seen": 447680, + "step": 910 + }, + { + "epoch": 0.12076019532796621, + "grad_norm": 3.7067677974700928, + "learning_rate": 4.824491950382686e-07, + "loss": 0.1353, + "num_input_tokens_seen": 450368, + "step": 915 + }, + { + "epoch": 0.12142008710571467, + "grad_norm": 5.35090446472168, + "learning_rate": 4.850884138295065e-07, + "loss": 0.0864, + "num_input_tokens_seen": 452800, + "step": 920 + }, + { + "epoch": 0.12207997888346311, + "grad_norm": 1.3662978410720825, + "learning_rate": 4.877276326207443e-07, + "loss": 0.1168, + "num_input_tokens_seen": 455744, + "step": 925 + }, + { + "epoch": 0.12273987066121156, + "grad_norm": 17.259654998779297, + "learning_rate": 4.90366851411982e-07, + "loss": 0.1899, + "num_input_tokens_seen": 458176, + "step": 930 + }, + { + "epoch": 0.12339976243896002, + "grad_norm": 0.9647508859634399, + "learning_rate": 4.930060702032198e-07, + "loss": 0.1902, + "num_input_tokens_seen": 460480, + "step": 935 + }, + { + "epoch": 0.12405965421670846, + "grad_norm": 6.764614105224609, + "learning_rate": 4.956452889944576e-07, + "loss": 0.169, + "num_input_tokens_seen": 462656, + "step": 940 + }, + { + "epoch": 0.1247195459944569, + "grad_norm": 391.0478210449219, + "learning_rate": 4.982845077856955e-07, + "loss": 0.2648, + "num_input_tokens_seen": 465344, + "step": 945 + }, + { + "epoch": 0.12537943777220537, + "grad_norm": 194.13442993164062, + "learning_rate": 5.009237265769331e-07, + "loss": 0.3667, + "num_input_tokens_seen": 467712, + "step": 950 + }, + { + "epoch": 0.1260393295499538, + "grad_norm": 101.93359375, + "learning_rate": 5.03562945368171e-07, + "loss": 0.2885, + "num_input_tokens_seen": 470080, + "step": 955 + }, + { + "epoch": 0.12669922132770225, + "grad_norm": 319.10693359375, + "learning_rate": 5.062021641594088e-07, + "loss": 0.1084, + "num_input_tokens_seen": 472512, + "step": 960 + }, + { + "epoch": 0.1273591131054507, + "grad_norm": 136.6356658935547, + "learning_rate": 5.088413829506465e-07, + "loss": 0.1628, + "num_input_tokens_seen": 474752, + "step": 965 + }, + { + "epoch": 0.12801900488319914, + "grad_norm": 256.36505126953125, + "learning_rate": 5.114806017418843e-07, + "loss": 0.0777, + "num_input_tokens_seen": 477440, + "step": 970 + }, + { + "epoch": 0.12867889666094762, + "grad_norm": 38.02722930908203, + "learning_rate": 5.141198205331222e-07, + "loss": 0.1169, + "num_input_tokens_seen": 480000, + "step": 975 + }, + { + "epoch": 0.12933878843869606, + "grad_norm": 97.68553924560547, + "learning_rate": 5.1675903932436e-07, + "loss": 0.2261, + "num_input_tokens_seen": 482368, + "step": 980 + }, + { + "epoch": 0.1299986802164445, + "grad_norm": 0.6980463862419128, + "learning_rate": 5.193982581155977e-07, + "loss": 0.0063, + "num_input_tokens_seen": 484544, + "step": 985 + }, + { + "epoch": 0.13065857199419295, + "grad_norm": 2.7004430294036865, + "learning_rate": 5.220374769068355e-07, + "loss": 0.1087, + "num_input_tokens_seen": 486720, + "step": 990 + }, + { + "epoch": 0.1313184637719414, + "grad_norm": 0.10351748764514923, + "learning_rate": 5.246766956980734e-07, + "loss": 0.0935, + "num_input_tokens_seen": 489344, + "step": 995 + }, + { + "epoch": 0.13197835554968984, + "grad_norm": 3.9759552478790283, + "learning_rate": 5.273159144893111e-07, + "loss": 0.0785, + "num_input_tokens_seen": 491776, + "step": 1000 + }, + { + "epoch": 0.1326382473274383, + "grad_norm": 405.497802734375, + "learning_rate": 5.29955133280549e-07, + "loss": 0.1646, + "num_input_tokens_seen": 493952, + "step": 1005 + }, + { + "epoch": 0.13329813910518676, + "grad_norm": 48.91209411621094, + "learning_rate": 5.325943520717867e-07, + "loss": 0.2601, + "num_input_tokens_seen": 496320, + "step": 1010 + }, + { + "epoch": 0.1339580308829352, + "grad_norm": 435.2791748046875, + "learning_rate": 5.352335708630246e-07, + "loss": 0.075, + "num_input_tokens_seen": 498496, + "step": 1015 + }, + { + "epoch": 0.13461792266068365, + "grad_norm": 58.02470397949219, + "learning_rate": 5.378727896542623e-07, + "loss": 0.3792, + "num_input_tokens_seen": 501056, + "step": 1020 + }, + { + "epoch": 0.1352778144384321, + "grad_norm": 19.976274490356445, + "learning_rate": 5.405120084455001e-07, + "loss": 0.0633, + "num_input_tokens_seen": 503296, + "step": 1025 + }, + { + "epoch": 0.13593770621618054, + "grad_norm": 36.125999450683594, + "learning_rate": 5.431512272367379e-07, + "loss": 0.2425, + "num_input_tokens_seen": 505856, + "step": 1030 + }, + { + "epoch": 0.13659759799392898, + "grad_norm": 4.3010334968566895, + "learning_rate": 5.457904460279758e-07, + "loss": 0.2077, + "num_input_tokens_seen": 508416, + "step": 1035 + }, + { + "epoch": 0.13725748977167745, + "grad_norm": 1.1001554727554321, + "learning_rate": 5.484296648192135e-07, + "loss": 0.1821, + "num_input_tokens_seen": 511040, + "step": 1040 + }, + { + "epoch": 0.1379173815494259, + "grad_norm": 1.0026289224624634, + "learning_rate": 5.510688836104512e-07, + "loss": 0.0126, + "num_input_tokens_seen": 513216, + "step": 1045 + }, + { + "epoch": 0.13857727332717434, + "grad_norm": 310.48272705078125, + "learning_rate": 5.537081024016891e-07, + "loss": 0.2747, + "num_input_tokens_seen": 515456, + "step": 1050 + }, + { + "epoch": 0.1392371651049228, + "grad_norm": 0.2722547948360443, + "learning_rate": 5.563473211929268e-07, + "loss": 0.2289, + "num_input_tokens_seen": 517824, + "step": 1055 + }, + { + "epoch": 0.13989705688267123, + "grad_norm": 9.799527168273926, + "learning_rate": 5.589865399841647e-07, + "loss": 0.1583, + "num_input_tokens_seen": 520320, + "step": 1060 + }, + { + "epoch": 0.14055694866041968, + "grad_norm": 153.0984344482422, + "learning_rate": 5.616257587754024e-07, + "loss": 0.0417, + "num_input_tokens_seen": 522752, + "step": 1065 + }, + { + "epoch": 0.14121684043816815, + "grad_norm": 58.452980041503906, + "learning_rate": 5.642649775666402e-07, + "loss": 0.0679, + "num_input_tokens_seen": 525376, + "step": 1070 + }, + { + "epoch": 0.1418767322159166, + "grad_norm": 1.3527008295059204, + "learning_rate": 5.66904196357878e-07, + "loss": 0.2223, + "num_input_tokens_seen": 527808, + "step": 1075 + }, + { + "epoch": 0.14253662399366504, + "grad_norm": 0.029463879764080048, + "learning_rate": 5.695434151491159e-07, + "loss": 0.1876, + "num_input_tokens_seen": 530304, + "step": 1080 + }, + { + "epoch": 0.14319651577141349, + "grad_norm": 30.274728775024414, + "learning_rate": 5.721826339403536e-07, + "loss": 0.375, + "num_input_tokens_seen": 532480, + "step": 1085 + }, + { + "epoch": 0.14385640754916193, + "grad_norm": 57.1826171875, + "learning_rate": 5.748218527315914e-07, + "loss": 0.2028, + "num_input_tokens_seen": 535168, + "step": 1090 + }, + { + "epoch": 0.14451629932691037, + "grad_norm": 52.93345260620117, + "learning_rate": 5.774610715228292e-07, + "loss": 0.3046, + "num_input_tokens_seen": 537600, + "step": 1095 + }, + { + "epoch": 0.14517619110465885, + "grad_norm": 131.98606872558594, + "learning_rate": 5.801002903140671e-07, + "loss": 0.043, + "num_input_tokens_seen": 540096, + "step": 1100 + }, + { + "epoch": 0.1458360828824073, + "grad_norm": 1.4051055908203125, + "learning_rate": 5.827395091053047e-07, + "loss": 0.0119, + "num_input_tokens_seen": 542592, + "step": 1105 + }, + { + "epoch": 0.14649597466015574, + "grad_norm": 0.07283133268356323, + "learning_rate": 5.853787278965426e-07, + "loss": 0.0392, + "num_input_tokens_seen": 544960, + "step": 1110 + }, + { + "epoch": 0.14715586643790418, + "grad_norm": 121.37798309326172, + "learning_rate": 5.880179466877804e-07, + "loss": 0.0851, + "num_input_tokens_seen": 547136, + "step": 1115 + }, + { + "epoch": 0.14781575821565263, + "grad_norm": 138.19085693359375, + "learning_rate": 5.906571654790183e-07, + "loss": 0.1365, + "num_input_tokens_seen": 549440, + "step": 1120 + }, + { + "epoch": 0.14847564999340107, + "grad_norm": 78.11005401611328, + "learning_rate": 5.932963842702559e-07, + "loss": 0.2707, + "num_input_tokens_seen": 552064, + "step": 1125 + }, + { + "epoch": 0.14913554177114954, + "grad_norm": 31.630468368530273, + "learning_rate": 5.959356030614938e-07, + "loss": 0.0997, + "num_input_tokens_seen": 554432, + "step": 1130 + }, + { + "epoch": 0.149795433548898, + "grad_norm": 3.8265812397003174, + "learning_rate": 5.985748218527316e-07, + "loss": 0.0028, + "num_input_tokens_seen": 556800, + "step": 1135 + }, + { + "epoch": 0.15045532532664643, + "grad_norm": 1.7065273523330688, + "learning_rate": 6.012140406439695e-07, + "loss": 0.2855, + "num_input_tokens_seen": 559296, + "step": 1140 + }, + { + "epoch": 0.15111521710439488, + "grad_norm": 39.60039138793945, + "learning_rate": 6.038532594352071e-07, + "loss": 0.1093, + "num_input_tokens_seen": 562112, + "step": 1145 + }, + { + "epoch": 0.15177510888214332, + "grad_norm": 0.6826571822166443, + "learning_rate": 6.064924782264449e-07, + "loss": 0.0592, + "num_input_tokens_seen": 564288, + "step": 1150 + }, + { + "epoch": 0.15243500065989177, + "grad_norm": 120.91127014160156, + "learning_rate": 6.091316970176828e-07, + "loss": 0.188, + "num_input_tokens_seen": 566848, + "step": 1155 + }, + { + "epoch": 0.15309489243764024, + "grad_norm": 98.7718505859375, + "learning_rate": 6.117709158089205e-07, + "loss": 0.4772, + "num_input_tokens_seen": 569152, + "step": 1160 + }, + { + "epoch": 0.15375478421538868, + "grad_norm": 94.5262222290039, + "learning_rate": 6.144101346001583e-07, + "loss": 0.0848, + "num_input_tokens_seen": 571456, + "step": 1165 + }, + { + "epoch": 0.15441467599313713, + "grad_norm": 0.14129430055618286, + "learning_rate": 6.170493533913961e-07, + "loss": 0.2235, + "num_input_tokens_seen": 573824, + "step": 1170 + }, + { + "epoch": 0.15507456777088557, + "grad_norm": 206.35865783691406, + "learning_rate": 6.196885721826339e-07, + "loss": 0.0517, + "num_input_tokens_seen": 576128, + "step": 1175 + }, + { + "epoch": 0.15573445954863402, + "grad_norm": 0.3648551106452942, + "learning_rate": 6.223277909738716e-07, + "loss": 0.1252, + "num_input_tokens_seen": 578432, + "step": 1180 + }, + { + "epoch": 0.15639435132638246, + "grad_norm": 0.18024662137031555, + "learning_rate": 6.249670097651095e-07, + "loss": 0.2063, + "num_input_tokens_seen": 580736, + "step": 1185 + }, + { + "epoch": 0.15705424310413094, + "grad_norm": 9.043709754943848, + "learning_rate": 6.276062285563473e-07, + "loss": 0.0011, + "num_input_tokens_seen": 583040, + "step": 1190 + }, + { + "epoch": 0.15771413488187938, + "grad_norm": 0.12227041274309158, + "learning_rate": 6.302454473475851e-07, + "loss": 0.0065, + "num_input_tokens_seen": 585344, + "step": 1195 + }, + { + "epoch": 0.15837402665962783, + "grad_norm": 76.58000183105469, + "learning_rate": 6.328846661388228e-07, + "loss": 0.2037, + "num_input_tokens_seen": 587904, + "step": 1200 + }, + { + "epoch": 0.15903391843737627, + "grad_norm": 0.9080504179000854, + "learning_rate": 6.355238849300607e-07, + "loss": 0.2007, + "num_input_tokens_seen": 590208, + "step": 1205 + }, + { + "epoch": 0.15969381021512472, + "grad_norm": 152.57972717285156, + "learning_rate": 6.381631037212984e-07, + "loss": 0.1552, + "num_input_tokens_seen": 592512, + "step": 1210 + }, + { + "epoch": 0.16035370199287316, + "grad_norm": 0.8822894096374512, + "learning_rate": 6.408023225125363e-07, + "loss": 0.2204, + "num_input_tokens_seen": 595072, + "step": 1215 + }, + { + "epoch": 0.1610135937706216, + "grad_norm": 294.2984924316406, + "learning_rate": 6.43441541303774e-07, + "loss": 0.2003, + "num_input_tokens_seen": 597632, + "step": 1220 + }, + { + "epoch": 0.16167348554837008, + "grad_norm": 0.054259005934000015, + "learning_rate": 6.460807600950119e-07, + "loss": 0.0941, + "num_input_tokens_seen": 600192, + "step": 1225 + }, + { + "epoch": 0.16233337732611852, + "grad_norm": 0.06157355755567551, + "learning_rate": 6.487199788862496e-07, + "loss": 0.2021, + "num_input_tokens_seen": 602304, + "step": 1230 + }, + { + "epoch": 0.16299326910386697, + "grad_norm": 391.3377990722656, + "learning_rate": 6.513591976774875e-07, + "loss": 0.2325, + "num_input_tokens_seen": 604608, + "step": 1235 + }, + { + "epoch": 0.1636531608816154, + "grad_norm": 27.769588470458984, + "learning_rate": 6.539984164687252e-07, + "loss": 0.1003, + "num_input_tokens_seen": 606976, + "step": 1240 + }, + { + "epoch": 0.16431305265936386, + "grad_norm": 0.0755537897348404, + "learning_rate": 6.566376352599631e-07, + "loss": 0.0153, + "num_input_tokens_seen": 609600, + "step": 1245 + }, + { + "epoch": 0.1649729444371123, + "grad_norm": 177.3814697265625, + "learning_rate": 6.592768540512008e-07, + "loss": 0.2758, + "num_input_tokens_seen": 611840, + "step": 1250 + }, + { + "epoch": 0.16563283621486077, + "grad_norm": 12.570916175842285, + "learning_rate": 6.619160728424386e-07, + "loss": 0.1107, + "num_input_tokens_seen": 614528, + "step": 1255 + }, + { + "epoch": 0.16629272799260922, + "grad_norm": 0.11696495860815048, + "learning_rate": 6.645552916336764e-07, + "loss": 0.0118, + "num_input_tokens_seen": 617024, + "step": 1260 + }, + { + "epoch": 0.16695261977035766, + "grad_norm": 40.369667053222656, + "learning_rate": 6.671945104249141e-07, + "loss": 0.156, + "num_input_tokens_seen": 619392, + "step": 1265 + }, + { + "epoch": 0.1676125115481061, + "grad_norm": 67.77987670898438, + "learning_rate": 6.69833729216152e-07, + "loss": 0.179, + "num_input_tokens_seen": 621888, + "step": 1270 + }, + { + "epoch": 0.16827240332585455, + "grad_norm": 166.91651916503906, + "learning_rate": 6.724729480073898e-07, + "loss": 0.1028, + "num_input_tokens_seen": 624192, + "step": 1275 + }, + { + "epoch": 0.168932295103603, + "grad_norm": 0.7046058177947998, + "learning_rate": 6.751121667986275e-07, + "loss": 0.0813, + "num_input_tokens_seen": 626624, + "step": 1280 + }, + { + "epoch": 0.16959218688135147, + "grad_norm": 60.36655044555664, + "learning_rate": 6.777513855898653e-07, + "loss": 0.0655, + "num_input_tokens_seen": 628928, + "step": 1285 + }, + { + "epoch": 0.17025207865909991, + "grad_norm": 0.3693448007106781, + "learning_rate": 6.803906043811032e-07, + "loss": 0.1142, + "num_input_tokens_seen": 631616, + "step": 1290 + }, + { + "epoch": 0.17091197043684836, + "grad_norm": 0.0590805858373642, + "learning_rate": 6.83029823172341e-07, + "loss": 0.045, + "num_input_tokens_seen": 634112, + "step": 1295 + }, + { + "epoch": 0.1715718622145968, + "grad_norm": 261.4384460449219, + "learning_rate": 6.856690419635787e-07, + "loss": 0.2547, + "num_input_tokens_seen": 636416, + "step": 1300 + }, + { + "epoch": 0.17223175399234525, + "grad_norm": 0.21970777213573456, + "learning_rate": 6.883082607548165e-07, + "loss": 0.0028, + "num_input_tokens_seen": 638848, + "step": 1305 + }, + { + "epoch": 0.1728916457700937, + "grad_norm": 170.5442352294922, + "learning_rate": 6.909474795460544e-07, + "loss": 0.0734, + "num_input_tokens_seen": 641216, + "step": 1310 + }, + { + "epoch": 0.17355153754784217, + "grad_norm": 0.15614676475524902, + "learning_rate": 6.935866983372921e-07, + "loss": 0.0042, + "num_input_tokens_seen": 644032, + "step": 1315 + }, + { + "epoch": 0.1742114293255906, + "grad_norm": 0.9323055744171143, + "learning_rate": 6.962259171285299e-07, + "loss": 0.1632, + "num_input_tokens_seen": 646528, + "step": 1320 + }, + { + "epoch": 0.17487132110333906, + "grad_norm": 0.03342974931001663, + "learning_rate": 6.988651359197677e-07, + "loss": 0.1357, + "num_input_tokens_seen": 649088, + "step": 1325 + }, + { + "epoch": 0.1755312128810875, + "grad_norm": 0.4350726008415222, + "learning_rate": 7.015043547110056e-07, + "loss": 0.0358, + "num_input_tokens_seen": 651520, + "step": 1330 + }, + { + "epoch": 0.17619110465883595, + "grad_norm": 151.69903564453125, + "learning_rate": 7.041435735022433e-07, + "loss": 0.1195, + "num_input_tokens_seen": 654016, + "step": 1335 + }, + { + "epoch": 0.1768509964365844, + "grad_norm": 0.02422684244811535, + "learning_rate": 7.067827922934811e-07, + "loss": 0.0002, + "num_input_tokens_seen": 656320, + "step": 1340 + }, + { + "epoch": 0.17751088821433286, + "grad_norm": 186.25314331054688, + "learning_rate": 7.094220110847189e-07, + "loss": 0.2259, + "num_input_tokens_seen": 658880, + "step": 1345 + }, + { + "epoch": 0.1781707799920813, + "grad_norm": 0.054249465465545654, + "learning_rate": 7.120612298759568e-07, + "loss": 0.0003, + "num_input_tokens_seen": 661184, + "step": 1350 + }, + { + "epoch": 0.17883067176982975, + "grad_norm": 0.8555107116699219, + "learning_rate": 7.147004486671945e-07, + "loss": 0.0801, + "num_input_tokens_seen": 663680, + "step": 1355 + }, + { + "epoch": 0.1794905635475782, + "grad_norm": 40.881561279296875, + "learning_rate": 7.173396674584322e-07, + "loss": 0.0336, + "num_input_tokens_seen": 666048, + "step": 1360 + }, + { + "epoch": 0.18015045532532664, + "grad_norm": 196.4837188720703, + "learning_rate": 7.199788862496701e-07, + "loss": 0.1074, + "num_input_tokens_seen": 668480, + "step": 1365 + }, + { + "epoch": 0.1808103471030751, + "grad_norm": 0.763932466506958, + "learning_rate": 7.226181050409078e-07, + "loss": 0.2056, + "num_input_tokens_seen": 671488, + "step": 1370 + }, + { + "epoch": 0.18147023888082353, + "grad_norm": 0.08814774453639984, + "learning_rate": 7.252573238321457e-07, + "loss": 0.1112, + "num_input_tokens_seen": 673920, + "step": 1375 + }, + { + "epoch": 0.182130130658572, + "grad_norm": 116.7082290649414, + "learning_rate": 7.278965426233834e-07, + "loss": 0.2556, + "num_input_tokens_seen": 676416, + "step": 1380 + }, + { + "epoch": 0.18279002243632045, + "grad_norm": 230.38856506347656, + "learning_rate": 7.305357614146212e-07, + "loss": 0.1058, + "num_input_tokens_seen": 679040, + "step": 1385 + }, + { + "epoch": 0.1834499142140689, + "grad_norm": 18.40300178527832, + "learning_rate": 7.33174980205859e-07, + "loss": 0.188, + "num_input_tokens_seen": 681920, + "step": 1390 + }, + { + "epoch": 0.18410980599181734, + "grad_norm": 0.1622646301984787, + "learning_rate": 7.358141989970969e-07, + "loss": 0.0583, + "num_input_tokens_seen": 684416, + "step": 1395 + }, + { + "epoch": 0.18476969776956578, + "grad_norm": 0.5164310336112976, + "learning_rate": 7.384534177883346e-07, + "loss": 0.0136, + "num_input_tokens_seen": 686976, + "step": 1400 + }, + { + "epoch": 0.18542958954731423, + "grad_norm": 0.15728312730789185, + "learning_rate": 7.410926365795724e-07, + "loss": 0.1873, + "num_input_tokens_seen": 689408, + "step": 1405 + }, + { + "epoch": 0.1860894813250627, + "grad_norm": 0.05055977404117584, + "learning_rate": 7.437318553708102e-07, + "loss": 0.0987, + "num_input_tokens_seen": 691712, + "step": 1410 + }, + { + "epoch": 0.18674937310281114, + "grad_norm": 0.0684177577495575, + "learning_rate": 7.463710741620481e-07, + "loss": 0.1017, + "num_input_tokens_seen": 694080, + "step": 1415 + }, + { + "epoch": 0.1874092648805596, + "grad_norm": 102.82398986816406, + "learning_rate": 7.490102929532857e-07, + "loss": 0.1519, + "num_input_tokens_seen": 696640, + "step": 1420 + }, + { + "epoch": 0.18806915665830803, + "grad_norm": 0.09111729264259338, + "learning_rate": 7.516495117445236e-07, + "loss": 0.0091, + "num_input_tokens_seen": 699136, + "step": 1425 + }, + { + "epoch": 0.18872904843605648, + "grad_norm": 26.446638107299805, + "learning_rate": 7.542887305357614e-07, + "loss": 0.3917, + "num_input_tokens_seen": 701696, + "step": 1430 + }, + { + "epoch": 0.18938894021380492, + "grad_norm": 66.99994659423828, + "learning_rate": 7.569279493269993e-07, + "loss": 0.206, + "num_input_tokens_seen": 704384, + "step": 1435 + }, + { + "epoch": 0.1900488319915534, + "grad_norm": 2.427685022354126, + "learning_rate": 7.595671681182369e-07, + "loss": 0.0719, + "num_input_tokens_seen": 706560, + "step": 1440 + }, + { + "epoch": 0.19070872376930184, + "grad_norm": 1.8941874504089355, + "learning_rate": 7.622063869094748e-07, + "loss": 0.0804, + "num_input_tokens_seen": 709248, + "step": 1445 + }, + { + "epoch": 0.19136861554705029, + "grad_norm": 45.50901794433594, + "learning_rate": 7.648456057007126e-07, + "loss": 0.1375, + "num_input_tokens_seen": 711296, + "step": 1450 + }, + { + "epoch": 0.19202850732479873, + "grad_norm": 0.24863462150096893, + "learning_rate": 7.674848244919505e-07, + "loss": 0.0051, + "num_input_tokens_seen": 713728, + "step": 1455 + }, + { + "epoch": 0.19268839910254718, + "grad_norm": 2.862823009490967, + "learning_rate": 7.701240432831881e-07, + "loss": 0.0025, + "num_input_tokens_seen": 716224, + "step": 1460 + }, + { + "epoch": 0.19334829088029562, + "grad_norm": 52.276092529296875, + "learning_rate": 7.727632620744259e-07, + "loss": 0.3884, + "num_input_tokens_seen": 718528, + "step": 1465 + }, + { + "epoch": 0.1940081826580441, + "grad_norm": 0.3433469533920288, + "learning_rate": 7.754024808656638e-07, + "loss": 0.2433, + "num_input_tokens_seen": 720960, + "step": 1470 + }, + { + "epoch": 0.19466807443579254, + "grad_norm": 0.39280804991722107, + "learning_rate": 7.780416996569014e-07, + "loss": 0.196, + "num_input_tokens_seen": 723200, + "step": 1475 + }, + { + "epoch": 0.19532796621354098, + "grad_norm": 87.35384368896484, + "learning_rate": 7.806809184481393e-07, + "loss": 0.1681, + "num_input_tokens_seen": 725888, + "step": 1480 + }, + { + "epoch": 0.19598785799128943, + "grad_norm": 0.5325558185577393, + "learning_rate": 7.833201372393771e-07, + "loss": 0.0609, + "num_input_tokens_seen": 728256, + "step": 1485 + }, + { + "epoch": 0.19664774976903787, + "grad_norm": 145.96885681152344, + "learning_rate": 7.859593560306149e-07, + "loss": 0.1787, + "num_input_tokens_seen": 730688, + "step": 1490 + }, + { + "epoch": 0.19730764154678632, + "grad_norm": 2.3841898441314697, + "learning_rate": 7.885985748218526e-07, + "loss": 0.2156, + "num_input_tokens_seen": 732992, + "step": 1495 + }, + { + "epoch": 0.1979675333245348, + "grad_norm": 0.35310041904449463, + "learning_rate": 7.912377936130905e-07, + "loss": 0.0585, + "num_input_tokens_seen": 735424, + "step": 1500 + }, + { + "epoch": 0.19862742510228323, + "grad_norm": 125.40451049804688, + "learning_rate": 7.938770124043283e-07, + "loss": 0.1832, + "num_input_tokens_seen": 737664, + "step": 1505 + }, + { + "epoch": 0.19928731688003168, + "grad_norm": 130.02059936523438, + "learning_rate": 7.965162311955661e-07, + "loss": 0.2169, + "num_input_tokens_seen": 740096, + "step": 1510 + }, + { + "epoch": 0.19994720865778012, + "grad_norm": 12.761601448059082, + "learning_rate": 7.991554499868038e-07, + "loss": 0.1121, + "num_input_tokens_seen": 742912, + "step": 1515 + }, + { + "epoch": 0.20060710043552857, + "grad_norm": 1.22785222530365, + "learning_rate": 8.017946687780417e-07, + "loss": 0.1211, + "num_input_tokens_seen": 745536, + "step": 1520 + }, + { + "epoch": 0.201266992213277, + "grad_norm": 0.4311539828777313, + "learning_rate": 8.044338875692794e-07, + "loss": 0.0182, + "num_input_tokens_seen": 747968, + "step": 1525 + }, + { + "epoch": 0.20192688399102549, + "grad_norm": 13.155280113220215, + "learning_rate": 8.070731063605173e-07, + "loss": 0.1159, + "num_input_tokens_seen": 750464, + "step": 1530 + }, + { + "epoch": 0.20258677576877393, + "grad_norm": 0.16525061428546906, + "learning_rate": 8.09712325151755e-07, + "loss": 0.1005, + "num_input_tokens_seen": 752896, + "step": 1535 + }, + { + "epoch": 0.20324666754652237, + "grad_norm": 116.2685317993164, + "learning_rate": 8.123515439429929e-07, + "loss": 0.2272, + "num_input_tokens_seen": 755648, + "step": 1540 + }, + { + "epoch": 0.20390655932427082, + "grad_norm": 25.550724029541016, + "learning_rate": 8.149907627342306e-07, + "loss": 0.2213, + "num_input_tokens_seen": 758144, + "step": 1545 + }, + { + "epoch": 0.20456645110201926, + "grad_norm": 4.4988884925842285, + "learning_rate": 8.176299815254685e-07, + "loss": 0.0083, + "num_input_tokens_seen": 760704, + "step": 1550 + }, + { + "epoch": 0.2052263428797677, + "grad_norm": 0.9621325135231018, + "learning_rate": 8.202692003167062e-07, + "loss": 0.0023, + "num_input_tokens_seen": 763136, + "step": 1555 + }, + { + "epoch": 0.20588623465751615, + "grad_norm": 1.8329963684082031, + "learning_rate": 8.229084191079441e-07, + "loss": 0.1036, + "num_input_tokens_seen": 766016, + "step": 1560 + }, + { + "epoch": 0.20654612643526463, + "grad_norm": 56.67019271850586, + "learning_rate": 8.255476378991818e-07, + "loss": 0.3823, + "num_input_tokens_seen": 768512, + "step": 1565 + }, + { + "epoch": 0.20720601821301307, + "grad_norm": 0.3594275712966919, + "learning_rate": 8.281868566904196e-07, + "loss": 0.2189, + "num_input_tokens_seen": 770816, + "step": 1570 + }, + { + "epoch": 0.20786590999076152, + "grad_norm": 361.115966796875, + "learning_rate": 8.308260754816574e-07, + "loss": 0.2545, + "num_input_tokens_seen": 773312, + "step": 1575 + }, + { + "epoch": 0.20852580176850996, + "grad_norm": 65.93611145019531, + "learning_rate": 8.334652942728951e-07, + "loss": 0.043, + "num_input_tokens_seen": 775680, + "step": 1580 + }, + { + "epoch": 0.2091856935462584, + "grad_norm": 20.88169288635254, + "learning_rate": 8.36104513064133e-07, + "loss": 0.1056, + "num_input_tokens_seen": 778048, + "step": 1585 + }, + { + "epoch": 0.20984558532400685, + "grad_norm": 0.9779016971588135, + "learning_rate": 8.387437318553708e-07, + "loss": 0.2807, + "num_input_tokens_seen": 780672, + "step": 1590 + }, + { + "epoch": 0.21050547710175532, + "grad_norm": 9.102095603942871, + "learning_rate": 8.413829506466085e-07, + "loss": 0.4118, + "num_input_tokens_seen": 783360, + "step": 1595 + }, + { + "epoch": 0.21116536887950377, + "grad_norm": 83.82833099365234, + "learning_rate": 8.440221694378463e-07, + "loss": 0.1426, + "num_input_tokens_seen": 785856, + "step": 1600 + }, + { + "epoch": 0.2118252606572522, + "grad_norm": 2.142838716506958, + "learning_rate": 8.466613882290842e-07, + "loss": 0.1497, + "num_input_tokens_seen": 788480, + "step": 1605 + }, + { + "epoch": 0.21248515243500066, + "grad_norm": 0.20364223420619965, + "learning_rate": 8.49300607020322e-07, + "loss": 0.1096, + "num_input_tokens_seen": 791040, + "step": 1610 + }, + { + "epoch": 0.2131450442127491, + "grad_norm": 0.12941564619541168, + "learning_rate": 8.519398258115597e-07, + "loss": 0.0508, + "num_input_tokens_seen": 793600, + "step": 1615 + }, + { + "epoch": 0.21380493599049755, + "grad_norm": 0.15976838767528534, + "learning_rate": 8.545790446027975e-07, + "loss": 0.2171, + "num_input_tokens_seen": 795968, + "step": 1620 + }, + { + "epoch": 0.21446482776824602, + "grad_norm": 0.6009201407432556, + "learning_rate": 8.572182633940354e-07, + "loss": 0.0251, + "num_input_tokens_seen": 798272, + "step": 1625 + }, + { + "epoch": 0.21512471954599446, + "grad_norm": 0.09053964912891388, + "learning_rate": 8.59857482185273e-07, + "loss": 0.1544, + "num_input_tokens_seen": 801024, + "step": 1630 + }, + { + "epoch": 0.2157846113237429, + "grad_norm": 39.99749755859375, + "learning_rate": 8.624967009765109e-07, + "loss": 0.1345, + "num_input_tokens_seen": 803584, + "step": 1635 + }, + { + "epoch": 0.21644450310149135, + "grad_norm": 3.849316120147705, + "learning_rate": 8.651359197677487e-07, + "loss": 0.0544, + "num_input_tokens_seen": 805952, + "step": 1640 + }, + { + "epoch": 0.2171043948792398, + "grad_norm": 0.24410617351531982, + "learning_rate": 8.677751385589866e-07, + "loss": 0.0881, + "num_input_tokens_seen": 808512, + "step": 1645 + }, + { + "epoch": 0.21776428665698824, + "grad_norm": 5.100990295410156, + "learning_rate": 8.704143573502242e-07, + "loss": 0.0636, + "num_input_tokens_seen": 811136, + "step": 1650 + }, + { + "epoch": 0.21842417843473672, + "grad_norm": 0.057112302631139755, + "learning_rate": 8.730535761414621e-07, + "loss": 0.0526, + "num_input_tokens_seen": 813376, + "step": 1655 + }, + { + "epoch": 0.21908407021248516, + "grad_norm": 130.31956481933594, + "learning_rate": 8.756927949326999e-07, + "loss": 0.2304, + "num_input_tokens_seen": 815872, + "step": 1660 + }, + { + "epoch": 0.2197439619902336, + "grad_norm": 0.1521269828081131, + "learning_rate": 8.783320137239377e-07, + "loss": 0.0414, + "num_input_tokens_seen": 818240, + "step": 1665 + }, + { + "epoch": 0.22040385376798205, + "grad_norm": 0.08848614990711212, + "learning_rate": 8.809712325151754e-07, + "loss": 0.108, + "num_input_tokens_seen": 820736, + "step": 1670 + }, + { + "epoch": 0.2210637455457305, + "grad_norm": 177.2266082763672, + "learning_rate": 8.836104513064132e-07, + "loss": 0.3837, + "num_input_tokens_seen": 823616, + "step": 1675 + }, + { + "epoch": 0.22172363732347894, + "grad_norm": 20.548864364624023, + "learning_rate": 8.862496700976511e-07, + "loss": 0.1827, + "num_input_tokens_seen": 826112, + "step": 1680 + }, + { + "epoch": 0.2223835291012274, + "grad_norm": 0.21072465181350708, + "learning_rate": 8.888888888888888e-07, + "loss": 0.1026, + "num_input_tokens_seen": 828672, + "step": 1685 + }, + { + "epoch": 0.22304342087897586, + "grad_norm": 0.858121931552887, + "learning_rate": 8.915281076801266e-07, + "loss": 0.1787, + "num_input_tokens_seen": 831296, + "step": 1690 + }, + { + "epoch": 0.2237033126567243, + "grad_norm": 50.27134704589844, + "learning_rate": 8.941673264713644e-07, + "loss": 0.0912, + "num_input_tokens_seen": 833856, + "step": 1695 + }, + { + "epoch": 0.22436320443447275, + "grad_norm": 0.13557344675064087, + "learning_rate": 8.968065452626022e-07, + "loss": 0.2237, + "num_input_tokens_seen": 836480, + "step": 1700 + }, + { + "epoch": 0.2250230962122212, + "grad_norm": 90.39064025878906, + "learning_rate": 8.9944576405384e-07, + "loss": 0.0627, + "num_input_tokens_seen": 838976, + "step": 1705 + }, + { + "epoch": 0.22568298798996964, + "grad_norm": 0.13472281396389008, + "learning_rate": 9.020849828450778e-07, + "loss": 0.1252, + "num_input_tokens_seen": 841728, + "step": 1710 + }, + { + "epoch": 0.22634287976771808, + "grad_norm": 0.06779789924621582, + "learning_rate": 9.047242016363156e-07, + "loss": 0.0499, + "num_input_tokens_seen": 843968, + "step": 1715 + }, + { + "epoch": 0.22700277154546655, + "grad_norm": 0.08471981436014175, + "learning_rate": 9.073634204275534e-07, + "loss": 0.4636, + "num_input_tokens_seen": 846464, + "step": 1720 + }, + { + "epoch": 0.227662663323215, + "grad_norm": 0.1624394655227661, + "learning_rate": 9.100026392187912e-07, + "loss": 0.2002, + "num_input_tokens_seen": 849088, + "step": 1725 + }, + { + "epoch": 0.22832255510096344, + "grad_norm": 0.18447798490524292, + "learning_rate": 9.12641858010029e-07, + "loss": 0.0052, + "num_input_tokens_seen": 851712, + "step": 1730 + }, + { + "epoch": 0.2289824468787119, + "grad_norm": 0.32411372661590576, + "learning_rate": 9.152810768012667e-07, + "loss": 0.156, + "num_input_tokens_seen": 854208, + "step": 1735 + }, + { + "epoch": 0.22964233865646033, + "grad_norm": 0.3888205885887146, + "learning_rate": 9.179202955925046e-07, + "loss": 0.0557, + "num_input_tokens_seen": 856576, + "step": 1740 + }, + { + "epoch": 0.23030223043420878, + "grad_norm": 0.337443083524704, + "learning_rate": 9.205595143837424e-07, + "loss": 0.2487, + "num_input_tokens_seen": 859008, + "step": 1745 + }, + { + "epoch": 0.23096212221195725, + "grad_norm": 20.267051696777344, + "learning_rate": 9.231987331749802e-07, + "loss": 0.1679, + "num_input_tokens_seen": 861440, + "step": 1750 + }, + { + "epoch": 0.2316220139897057, + "grad_norm": 86.99280548095703, + "learning_rate": 9.258379519662179e-07, + "loss": 0.1502, + "num_input_tokens_seen": 863936, + "step": 1755 + }, + { + "epoch": 0.23228190576745414, + "grad_norm": 79.0941390991211, + "learning_rate": 9.284771707574558e-07, + "loss": 0.1127, + "num_input_tokens_seen": 866176, + "step": 1760 + }, + { + "epoch": 0.23294179754520258, + "grad_norm": 0.8542360067367554, + "learning_rate": 9.311163895486936e-07, + "loss": 0.1574, + "num_input_tokens_seen": 868480, + "step": 1765 + }, + { + "epoch": 0.23360168932295103, + "grad_norm": 0.31946781277656555, + "learning_rate": 9.337556083399313e-07, + "loss": 0.1025, + "num_input_tokens_seen": 870976, + "step": 1770 + }, + { + "epoch": 0.23426158110069947, + "grad_norm": 2.6998465061187744, + "learning_rate": 9.363948271311691e-07, + "loss": 0.2237, + "num_input_tokens_seen": 873088, + "step": 1775 + }, + { + "epoch": 0.23492147287844795, + "grad_norm": 25.20713996887207, + "learning_rate": 9.390340459224069e-07, + "loss": 0.2408, + "num_input_tokens_seen": 875520, + "step": 1780 + }, + { + "epoch": 0.2355813646561964, + "grad_norm": 16.12818145751953, + "learning_rate": 9.416732647136448e-07, + "loss": 0.0166, + "num_input_tokens_seen": 877632, + "step": 1785 + }, + { + "epoch": 0.23624125643394484, + "grad_norm": 27.490631103515625, + "learning_rate": 9.443124835048824e-07, + "loss": 0.0702, + "num_input_tokens_seen": 880000, + "step": 1790 + }, + { + "epoch": 0.23690114821169328, + "grad_norm": 236.32696533203125, + "learning_rate": 9.469517022961203e-07, + "loss": 0.0669, + "num_input_tokens_seen": 882176, + "step": 1795 + }, + { + "epoch": 0.23756103998944172, + "grad_norm": 0.07463784515857697, + "learning_rate": 9.495909210873581e-07, + "loss": 0.0867, + "num_input_tokens_seen": 884800, + "step": 1800 + }, + { + "epoch": 0.23822093176719017, + "grad_norm": 58.357872009277344, + "learning_rate": 9.522301398785959e-07, + "loss": 0.0113, + "num_input_tokens_seen": 887104, + "step": 1805 + }, + { + "epoch": 0.23888082354493864, + "grad_norm": 154.2604217529297, + "learning_rate": 9.548693586698336e-07, + "loss": 0.043, + "num_input_tokens_seen": 889408, + "step": 1810 + }, + { + "epoch": 0.2395407153226871, + "grad_norm": 452.2360534667969, + "learning_rate": 9.575085774610714e-07, + "loss": 0.2031, + "num_input_tokens_seen": 891648, + "step": 1815 + }, + { + "epoch": 0.24020060710043553, + "grad_norm": 23.80782127380371, + "learning_rate": 9.601477962523092e-07, + "loss": 0.2171, + "num_input_tokens_seen": 894208, + "step": 1820 + }, + { + "epoch": 0.24086049887818398, + "grad_norm": 23.87192726135254, + "learning_rate": 9.627870150435472e-07, + "loss": 0.1157, + "num_input_tokens_seen": 896704, + "step": 1825 + }, + { + "epoch": 0.24152039065593242, + "grad_norm": 0.8979841470718384, + "learning_rate": 9.65426233834785e-07, + "loss": 0.045, + "num_input_tokens_seen": 899264, + "step": 1830 + }, + { + "epoch": 0.24218028243368087, + "grad_norm": 0.11097682267427444, + "learning_rate": 9.680654526260227e-07, + "loss": 0.0719, + "num_input_tokens_seen": 901760, + "step": 1835 + }, + { + "epoch": 0.24284017421142934, + "grad_norm": 313.09429931640625, + "learning_rate": 9.707046714172605e-07, + "loss": 0.1597, + "num_input_tokens_seen": 903872, + "step": 1840 + }, + { + "epoch": 0.24350006598917778, + "grad_norm": 0.03155740723013878, + "learning_rate": 9.733438902084983e-07, + "loss": 0.0005, + "num_input_tokens_seen": 906368, + "step": 1845 + }, + { + "epoch": 0.24415995776692623, + "grad_norm": 0.024505728855729103, + "learning_rate": 9.75983108999736e-07, + "loss": 0.0012, + "num_input_tokens_seen": 908864, + "step": 1850 + }, + { + "epoch": 0.24481984954467467, + "grad_norm": 0.07788019627332687, + "learning_rate": 9.786223277909738e-07, + "loss": 0.169, + "num_input_tokens_seen": 911040, + "step": 1855 + }, + { + "epoch": 0.24547974132242312, + "grad_norm": 32.930335998535156, + "learning_rate": 9.812615465822116e-07, + "loss": 0.2041, + "num_input_tokens_seen": 913408, + "step": 1860 + }, + { + "epoch": 0.24613963310017156, + "grad_norm": 24.17268180847168, + "learning_rate": 9.839007653734496e-07, + "loss": 0.4034, + "num_input_tokens_seen": 915968, + "step": 1865 + }, + { + "epoch": 0.24679952487792003, + "grad_norm": 106.91326904296875, + "learning_rate": 9.865399841646871e-07, + "loss": 0.1269, + "num_input_tokens_seen": 918528, + "step": 1870 + }, + { + "epoch": 0.24745941665566848, + "grad_norm": 0.11772750318050385, + "learning_rate": 9.89179202955925e-07, + "loss": 0.0006, + "num_input_tokens_seen": 921152, + "step": 1875 + }, + { + "epoch": 0.24811930843341692, + "grad_norm": 0.17723694443702698, + "learning_rate": 9.918184217471629e-07, + "loss": 0.0005, + "num_input_tokens_seen": 923520, + "step": 1880 + }, + { + "epoch": 0.24877920021116537, + "grad_norm": 0.6994357109069824, + "learning_rate": 9.944576405384004e-07, + "loss": 0.1003, + "num_input_tokens_seen": 925888, + "step": 1885 + }, + { + "epoch": 0.2494390919889138, + "grad_norm": 21.57866096496582, + "learning_rate": 9.970968593296384e-07, + "loss": 0.1144, + "num_input_tokens_seen": 928704, + "step": 1890 + }, + { + "epoch": 0.2500989837666623, + "grad_norm": 130.3249053955078, + "learning_rate": 9.997360781208762e-07, + "loss": 0.4074, + "num_input_tokens_seen": 930944, + "step": 1895 + }, + { + "epoch": 0.2500989837666623, + "eval_loss": 0.15521390736103058, + "eval_runtime": 7.8747, + "eval_samples_per_second": 855.274, + "eval_steps_per_second": 106.925, + "num_input_tokens_seen": 930944, + "step": 1895 + }, + { + "epoch": 0.25075887554441073, + "grad_norm": 397.0398254394531, + "learning_rate": 1.002375296912114e-06, + "loss": 0.2799, + "num_input_tokens_seen": 933376, + "step": 1900 + }, + { + "epoch": 0.2514187673221592, + "grad_norm": 0.684500515460968, + "learning_rate": 1.0050145157033517e-06, + "loss": 0.151, + "num_input_tokens_seen": 936000, + "step": 1905 + }, + { + "epoch": 0.2520786590999076, + "grad_norm": 65.4966049194336, + "learning_rate": 1.0076537344945895e-06, + "loss": 0.226, + "num_input_tokens_seen": 938432, + "step": 1910 + }, + { + "epoch": 0.25273855087765607, + "grad_norm": 58.49400329589844, + "learning_rate": 1.0102929532858273e-06, + "loss": 0.1408, + "num_input_tokens_seen": 941312, + "step": 1915 + }, + { + "epoch": 0.2533984426554045, + "grad_norm": 59.23992919921875, + "learning_rate": 1.012932172077065e-06, + "loss": 0.0428, + "num_input_tokens_seen": 943488, + "step": 1920 + }, + { + "epoch": 0.25405833443315295, + "grad_norm": 57.335479736328125, + "learning_rate": 1.015571390868303e-06, + "loss": 0.1021, + "num_input_tokens_seen": 945856, + "step": 1925 + }, + { + "epoch": 0.2547182262109014, + "grad_norm": 10.350142478942871, + "learning_rate": 1.0182106096595406e-06, + "loss": 0.1363, + "num_input_tokens_seen": 948352, + "step": 1930 + }, + { + "epoch": 0.25537811798864984, + "grad_norm": 0.7066463828086853, + "learning_rate": 1.0208498284507786e-06, + "loss": 0.0973, + "num_input_tokens_seen": 950976, + "step": 1935 + }, + { + "epoch": 0.2560380097663983, + "grad_norm": 316.34130859375, + "learning_rate": 1.0234890472420164e-06, + "loss": 0.1607, + "num_input_tokens_seen": 953216, + "step": 1940 + }, + { + "epoch": 0.25669790154414673, + "grad_norm": 2.6355180740356445, + "learning_rate": 1.0261282660332541e-06, + "loss": 0.015, + "num_input_tokens_seen": 955648, + "step": 1945 + }, + { + "epoch": 0.25735779332189523, + "grad_norm": 0.5929775238037109, + "learning_rate": 1.028767484824492e-06, + "loss": 0.0037, + "num_input_tokens_seen": 957888, + "step": 1950 + }, + { + "epoch": 0.2580176850996437, + "grad_norm": 29.690797805786133, + "learning_rate": 1.0314067036157297e-06, + "loss": 0.1865, + "num_input_tokens_seen": 960128, + "step": 1955 + }, + { + "epoch": 0.2586775768773921, + "grad_norm": 0.16472673416137695, + "learning_rate": 1.0340459224069675e-06, + "loss": 0.0348, + "num_input_tokens_seen": 962496, + "step": 1960 + }, + { + "epoch": 0.25933746865514057, + "grad_norm": 22.16752052307129, + "learning_rate": 1.0366851411982054e-06, + "loss": 0.3992, + "num_input_tokens_seen": 965120, + "step": 1965 + }, + { + "epoch": 0.259997360432889, + "grad_norm": 0.6555209159851074, + "learning_rate": 1.039324359989443e-06, + "loss": 0.007, + "num_input_tokens_seen": 967616, + "step": 1970 + }, + { + "epoch": 0.26065725221063746, + "grad_norm": 0.059777699410915375, + "learning_rate": 1.0419635787806808e-06, + "loss": 0.1056, + "num_input_tokens_seen": 970240, + "step": 1975 + }, + { + "epoch": 0.2613171439883859, + "grad_norm": 20.016475677490234, + "learning_rate": 1.0446027975719188e-06, + "loss": 0.2734, + "num_input_tokens_seen": 972544, + "step": 1980 + }, + { + "epoch": 0.26197703576613435, + "grad_norm": 0.36860647797584534, + "learning_rate": 1.0472420163631565e-06, + "loss": 0.2381, + "num_input_tokens_seen": 974912, + "step": 1985 + }, + { + "epoch": 0.2626369275438828, + "grad_norm": 20.66069793701172, + "learning_rate": 1.049881235154394e-06, + "loss": 0.4659, + "num_input_tokens_seen": 977088, + "step": 1990 + }, + { + "epoch": 0.26329681932163124, + "grad_norm": 27.569839477539062, + "learning_rate": 1.052520453945632e-06, + "loss": 0.241, + "num_input_tokens_seen": 979648, + "step": 1995 + }, + { + "epoch": 0.2639567110993797, + "grad_norm": 25.123470306396484, + "learning_rate": 1.0551596727368699e-06, + "loss": 0.0961, + "num_input_tokens_seen": 982336, + "step": 2000 + }, + { + "epoch": 0.2646166028771281, + "grad_norm": 10.748541831970215, + "learning_rate": 1.0577988915281074e-06, + "loss": 0.0121, + "num_input_tokens_seen": 984768, + "step": 2005 + }, + { + "epoch": 0.2652764946548766, + "grad_norm": 0.4387301206588745, + "learning_rate": 1.0604381103193454e-06, + "loss": 0.0467, + "num_input_tokens_seen": 987136, + "step": 2010 + }, + { + "epoch": 0.26593638643262507, + "grad_norm": 0.04643954709172249, + "learning_rate": 1.0630773291105832e-06, + "loss": 0.142, + "num_input_tokens_seen": 989824, + "step": 2015 + }, + { + "epoch": 0.2665962782103735, + "grad_norm": 24.511646270751953, + "learning_rate": 1.0657165479018212e-06, + "loss": 0.2781, + "num_input_tokens_seen": 992064, + "step": 2020 + }, + { + "epoch": 0.26725616998812196, + "grad_norm": 35.56803512573242, + "learning_rate": 1.0683557666930587e-06, + "loss": 0.2715, + "num_input_tokens_seen": 994368, + "step": 2025 + }, + { + "epoch": 0.2679160617658704, + "grad_norm": 19.118850708007812, + "learning_rate": 1.0709949854842965e-06, + "loss": 0.1645, + "num_input_tokens_seen": 996864, + "step": 2030 + }, + { + "epoch": 0.26857595354361885, + "grad_norm": 0.19591161608695984, + "learning_rate": 1.0736342042755345e-06, + "loss": 0.1714, + "num_input_tokens_seen": 999360, + "step": 2035 + }, + { + "epoch": 0.2692358453213673, + "grad_norm": 0.8063420653343201, + "learning_rate": 1.0762734230667723e-06, + "loss": 0.0919, + "num_input_tokens_seen": 1001920, + "step": 2040 + }, + { + "epoch": 0.26989573709911574, + "grad_norm": 0.3764183819293976, + "learning_rate": 1.0789126418580098e-06, + "loss": 0.0063, + "num_input_tokens_seen": 1004224, + "step": 2045 + }, + { + "epoch": 0.2705556288768642, + "grad_norm": 0.18820199370384216, + "learning_rate": 1.0815518606492478e-06, + "loss": 0.0012, + "num_input_tokens_seen": 1006528, + "step": 2050 + }, + { + "epoch": 0.27121552065461263, + "grad_norm": 1.6984174251556396, + "learning_rate": 1.0841910794404856e-06, + "loss": 0.2114, + "num_input_tokens_seen": 1008896, + "step": 2055 + }, + { + "epoch": 0.2718754124323611, + "grad_norm": 0.23580117523670197, + "learning_rate": 1.0868302982317234e-06, + "loss": 0.1478, + "num_input_tokens_seen": 1011648, + "step": 2060 + }, + { + "epoch": 0.2725353042101095, + "grad_norm": 33.00039291381836, + "learning_rate": 1.0894695170229611e-06, + "loss": 0.0509, + "num_input_tokens_seen": 1014208, + "step": 2065 + }, + { + "epoch": 0.27319519598785796, + "grad_norm": 0.04923771321773529, + "learning_rate": 1.092108735814199e-06, + "loss": 0.105, + "num_input_tokens_seen": 1016640, + "step": 2070 + }, + { + "epoch": 0.27385508776560646, + "grad_norm": 0.052733778953552246, + "learning_rate": 1.0947479546054369e-06, + "loss": 0.0663, + "num_input_tokens_seen": 1019328, + "step": 2075 + }, + { + "epoch": 0.2745149795433549, + "grad_norm": 0.047354552894830704, + "learning_rate": 1.0973871733966747e-06, + "loss": 0.0865, + "num_input_tokens_seen": 1021696, + "step": 2080 + }, + { + "epoch": 0.27517487132110335, + "grad_norm": 97.71429443359375, + "learning_rate": 1.1000263921879122e-06, + "loss": 0.2014, + "num_input_tokens_seen": 1024256, + "step": 2085 + }, + { + "epoch": 0.2758347630988518, + "grad_norm": 33.12277603149414, + "learning_rate": 1.1026656109791502e-06, + "loss": 0.1935, + "num_input_tokens_seen": 1026560, + "step": 2090 + }, + { + "epoch": 0.27649465487660024, + "grad_norm": 29.502172470092773, + "learning_rate": 1.105304829770388e-06, + "loss": 0.2346, + "num_input_tokens_seen": 1029184, + "step": 2095 + }, + { + "epoch": 0.2771545466543487, + "grad_norm": 10.247140884399414, + "learning_rate": 1.1079440485616255e-06, + "loss": 0.0947, + "num_input_tokens_seen": 1031744, + "step": 2100 + }, + { + "epoch": 0.27781443843209713, + "grad_norm": 1.695241093635559, + "learning_rate": 1.1105832673528635e-06, + "loss": 0.1724, + "num_input_tokens_seen": 1034048, + "step": 2105 + }, + { + "epoch": 0.2784743302098456, + "grad_norm": 0.18251433968544006, + "learning_rate": 1.1132224861441013e-06, + "loss": 0.0564, + "num_input_tokens_seen": 1036544, + "step": 2110 + }, + { + "epoch": 0.279134221987594, + "grad_norm": 0.03173065558075905, + "learning_rate": 1.115861704935339e-06, + "loss": 0.2777, + "num_input_tokens_seen": 1038848, + "step": 2115 + }, + { + "epoch": 0.27979411376534247, + "grad_norm": 0.03673817217350006, + "learning_rate": 1.1185009237265768e-06, + "loss": 0.1166, + "num_input_tokens_seen": 1041152, + "step": 2120 + }, + { + "epoch": 0.2804540055430909, + "grad_norm": 18.983776092529297, + "learning_rate": 1.1211401425178146e-06, + "loss": 0.1476, + "num_input_tokens_seen": 1043968, + "step": 2125 + }, + { + "epoch": 0.28111389732083936, + "grad_norm": 107.69248962402344, + "learning_rate": 1.1237793613090524e-06, + "loss": 0.2938, + "num_input_tokens_seen": 1046272, + "step": 2130 + }, + { + "epoch": 0.28177378909858786, + "grad_norm": 0.15629757940769196, + "learning_rate": 1.1264185801002904e-06, + "loss": 0.1254, + "num_input_tokens_seen": 1048448, + "step": 2135 + }, + { + "epoch": 0.2824336808763363, + "grad_norm": 36.44291687011719, + "learning_rate": 1.129057798891528e-06, + "loss": 0.2135, + "num_input_tokens_seen": 1051072, + "step": 2140 + }, + { + "epoch": 0.28309357265408475, + "grad_norm": 0.4880758225917816, + "learning_rate": 1.131697017682766e-06, + "loss": 0.0968, + "num_input_tokens_seen": 1053312, + "step": 2145 + }, + { + "epoch": 0.2837534644318332, + "grad_norm": 0.0785878598690033, + "learning_rate": 1.1343362364740037e-06, + "loss": 0.1169, + "num_input_tokens_seen": 1055680, + "step": 2150 + }, + { + "epoch": 0.28441335620958164, + "grad_norm": 0.1919853389263153, + "learning_rate": 1.1369754552652415e-06, + "loss": 0.2016, + "num_input_tokens_seen": 1057984, + "step": 2155 + }, + { + "epoch": 0.2850732479873301, + "grad_norm": 0.431120365858078, + "learning_rate": 1.1396146740564792e-06, + "loss": 0.1111, + "num_input_tokens_seen": 1060736, + "step": 2160 + }, + { + "epoch": 0.2857331397650785, + "grad_norm": 1.5402590036392212, + "learning_rate": 1.142253892847717e-06, + "loss": 0.1279, + "num_input_tokens_seen": 1063424, + "step": 2165 + }, + { + "epoch": 0.28639303154282697, + "grad_norm": 0.02886631153523922, + "learning_rate": 1.1448931116389548e-06, + "loss": 0.0038, + "num_input_tokens_seen": 1065728, + "step": 2170 + }, + { + "epoch": 0.2870529233205754, + "grad_norm": 0.049261245876550674, + "learning_rate": 1.1475323304301928e-06, + "loss": 0.0007, + "num_input_tokens_seen": 1068160, + "step": 2175 + }, + { + "epoch": 0.28771281509832386, + "grad_norm": 0.04944710433483124, + "learning_rate": 1.1501715492214303e-06, + "loss": 0.2872, + "num_input_tokens_seen": 1070592, + "step": 2180 + }, + { + "epoch": 0.2883727068760723, + "grad_norm": 19.942960739135742, + "learning_rate": 1.1528107680126681e-06, + "loss": 0.0978, + "num_input_tokens_seen": 1073280, + "step": 2185 + }, + { + "epoch": 0.28903259865382075, + "grad_norm": 1.5230175256729126, + "learning_rate": 1.155449986803906e-06, + "loss": 0.1237, + "num_input_tokens_seen": 1075584, + "step": 2190 + }, + { + "epoch": 0.28969249043156925, + "grad_norm": 115.89716339111328, + "learning_rate": 1.1580892055951439e-06, + "loss": 0.1684, + "num_input_tokens_seen": 1078016, + "step": 2195 + }, + { + "epoch": 0.2903523822093177, + "grad_norm": 1.9137988090515137, + "learning_rate": 1.1607284243863814e-06, + "loss": 0.1865, + "num_input_tokens_seen": 1080512, + "step": 2200 + }, + { + "epoch": 0.29101227398706614, + "grad_norm": 18.28475570678711, + "learning_rate": 1.1633676431776194e-06, + "loss": 0.169, + "num_input_tokens_seen": 1082752, + "step": 2205 + }, + { + "epoch": 0.2916721657648146, + "grad_norm": 27.2545166015625, + "learning_rate": 1.1660068619688572e-06, + "loss": 0.3035, + "num_input_tokens_seen": 1085184, + "step": 2210 + }, + { + "epoch": 0.29233205754256303, + "grad_norm": 49.74034118652344, + "learning_rate": 1.1686460807600947e-06, + "loss": 0.212, + "num_input_tokens_seen": 1087360, + "step": 2215 + }, + { + "epoch": 0.2929919493203115, + "grad_norm": 35.755130767822266, + "learning_rate": 1.1712852995513327e-06, + "loss": 0.0484, + "num_input_tokens_seen": 1089984, + "step": 2220 + }, + { + "epoch": 0.2936518410980599, + "grad_norm": 0.13340047001838684, + "learning_rate": 1.1739245183425705e-06, + "loss": 0.0607, + "num_input_tokens_seen": 1092608, + "step": 2225 + }, + { + "epoch": 0.29431173287580836, + "grad_norm": 0.050269801169633865, + "learning_rate": 1.1765637371338085e-06, + "loss": 0.101, + "num_input_tokens_seen": 1095040, + "step": 2230 + }, + { + "epoch": 0.2949716246535568, + "grad_norm": 16.144840240478516, + "learning_rate": 1.179202955925046e-06, + "loss": 0.1061, + "num_input_tokens_seen": 1097728, + "step": 2235 + }, + { + "epoch": 0.29563151643130525, + "grad_norm": 300.6488952636719, + "learning_rate": 1.1818421747162838e-06, + "loss": 0.2662, + "num_input_tokens_seen": 1100096, + "step": 2240 + }, + { + "epoch": 0.2962914082090537, + "grad_norm": 173.1255340576172, + "learning_rate": 1.1844813935075218e-06, + "loss": 0.1855, + "num_input_tokens_seen": 1102400, + "step": 2245 + }, + { + "epoch": 0.29695129998680214, + "grad_norm": 0.08108215034008026, + "learning_rate": 1.1871206122987596e-06, + "loss": 0.0741, + "num_input_tokens_seen": 1104960, + "step": 2250 + }, + { + "epoch": 0.2976111917645506, + "grad_norm": 15.926095008850098, + "learning_rate": 1.1897598310899971e-06, + "loss": 0.2804, + "num_input_tokens_seen": 1107520, + "step": 2255 + }, + { + "epoch": 0.2982710835422991, + "grad_norm": 36.089237213134766, + "learning_rate": 1.1923990498812351e-06, + "loss": 0.2522, + "num_input_tokens_seen": 1109952, + "step": 2260 + }, + { + "epoch": 0.29893097532004753, + "grad_norm": 0.7445940971374512, + "learning_rate": 1.195038268672473e-06, + "loss": 0.0116, + "num_input_tokens_seen": 1112128, + "step": 2265 + }, + { + "epoch": 0.299590867097796, + "grad_norm": 0.1918533444404602, + "learning_rate": 1.1976774874637107e-06, + "loss": 0.0716, + "num_input_tokens_seen": 1114688, + "step": 2270 + }, + { + "epoch": 0.3002507588755444, + "grad_norm": 0.18674559891223907, + "learning_rate": 1.2003167062549485e-06, + "loss": 0.2035, + "num_input_tokens_seen": 1117248, + "step": 2275 + }, + { + "epoch": 0.30091065065329287, + "grad_norm": 101.24190521240234, + "learning_rate": 1.2029559250461862e-06, + "loss": 0.1032, + "num_input_tokens_seen": 1119808, + "step": 2280 + }, + { + "epoch": 0.3015705424310413, + "grad_norm": 376.71929931640625, + "learning_rate": 1.2055951438374242e-06, + "loss": 0.4005, + "num_input_tokens_seen": 1122112, + "step": 2285 + }, + { + "epoch": 0.30223043420878976, + "grad_norm": 1.0818003416061401, + "learning_rate": 1.208234362628662e-06, + "loss": 0.1585, + "num_input_tokens_seen": 1124544, + "step": 2290 + }, + { + "epoch": 0.3028903259865382, + "grad_norm": 0.4159611165523529, + "learning_rate": 1.2108735814198995e-06, + "loss": 0.1725, + "num_input_tokens_seen": 1127104, + "step": 2295 + }, + { + "epoch": 0.30355021776428665, + "grad_norm": 13.03470230102539, + "learning_rate": 1.2135128002111375e-06, + "loss": 0.1268, + "num_input_tokens_seen": 1129536, + "step": 2300 + }, + { + "epoch": 0.3042101095420351, + "grad_norm": 0.2850092053413391, + "learning_rate": 1.2161520190023753e-06, + "loss": 0.0557, + "num_input_tokens_seen": 1131840, + "step": 2305 + }, + { + "epoch": 0.30487000131978353, + "grad_norm": 2.2885732650756836, + "learning_rate": 1.2187912377936129e-06, + "loss": 0.128, + "num_input_tokens_seen": 1134336, + "step": 2310 + }, + { + "epoch": 0.305529893097532, + "grad_norm": 54.30757141113281, + "learning_rate": 1.2214304565848509e-06, + "loss": 0.3577, + "num_input_tokens_seen": 1136640, + "step": 2315 + }, + { + "epoch": 0.3061897848752805, + "grad_norm": 1.3606059551239014, + "learning_rate": 1.2240696753760886e-06, + "loss": 0.1164, + "num_input_tokens_seen": 1139136, + "step": 2320 + }, + { + "epoch": 0.3068496766530289, + "grad_norm": 0.1324460804462433, + "learning_rate": 1.2267088941673264e-06, + "loss": 0.0429, + "num_input_tokens_seen": 1141568, + "step": 2325 + }, + { + "epoch": 0.30750956843077737, + "grad_norm": 0.09382675588130951, + "learning_rate": 1.2293481129585642e-06, + "loss": 0.0024, + "num_input_tokens_seen": 1143936, + "step": 2330 + }, + { + "epoch": 0.3081694602085258, + "grad_norm": 76.82371520996094, + "learning_rate": 1.231987331749802e-06, + "loss": 0.0776, + "num_input_tokens_seen": 1146624, + "step": 2335 + }, + { + "epoch": 0.30882935198627426, + "grad_norm": 0.43480339646339417, + "learning_rate": 1.2346265505410397e-06, + "loss": 0.0591, + "num_input_tokens_seen": 1149120, + "step": 2340 + }, + { + "epoch": 0.3094892437640227, + "grad_norm": 0.028940560296177864, + "learning_rate": 1.2372657693322777e-06, + "loss": 0.1412, + "num_input_tokens_seen": 1151488, + "step": 2345 + }, + { + "epoch": 0.31014913554177115, + "grad_norm": 0.2386150062084198, + "learning_rate": 1.2399049881235153e-06, + "loss": 0.0975, + "num_input_tokens_seen": 1153600, + "step": 2350 + }, + { + "epoch": 0.3108090273195196, + "grad_norm": 0.01438100729137659, + "learning_rate": 1.2425442069147532e-06, + "loss": 0.1037, + "num_input_tokens_seen": 1156224, + "step": 2355 + }, + { + "epoch": 0.31146891909726804, + "grad_norm": 0.034385617822408676, + "learning_rate": 1.245183425705991e-06, + "loss": 0.1141, + "num_input_tokens_seen": 1158656, + "step": 2360 + }, + { + "epoch": 0.3121288108750165, + "grad_norm": 0.020433984696865082, + "learning_rate": 1.2478226444972288e-06, + "loss": 0.1503, + "num_input_tokens_seen": 1161408, + "step": 2365 + }, + { + "epoch": 0.3127887026527649, + "grad_norm": 0.3348312973976135, + "learning_rate": 1.2504618632884666e-06, + "loss": 0.0871, + "num_input_tokens_seen": 1163904, + "step": 2370 + }, + { + "epoch": 0.31344859443051337, + "grad_norm": 0.22757618129253387, + "learning_rate": 1.2531010820797043e-06, + "loss": 0.3747, + "num_input_tokens_seen": 1166208, + "step": 2375 + }, + { + "epoch": 0.3141084862082619, + "grad_norm": 1.5419764518737793, + "learning_rate": 1.2557403008709421e-06, + "loss": 0.1877, + "num_input_tokens_seen": 1168512, + "step": 2380 + }, + { + "epoch": 0.3147683779860103, + "grad_norm": 10.151047706604004, + "learning_rate": 1.25837951966218e-06, + "loss": 0.2214, + "num_input_tokens_seen": 1171072, + "step": 2385 + }, + { + "epoch": 0.31542826976375876, + "grad_norm": 0.7378384470939636, + "learning_rate": 1.2610187384534177e-06, + "loss": 0.0104, + "num_input_tokens_seen": 1173312, + "step": 2390 + }, + { + "epoch": 0.3160881615415072, + "grad_norm": 0.357771098613739, + "learning_rate": 1.2636579572446554e-06, + "loss": 0.1493, + "num_input_tokens_seen": 1175744, + "step": 2395 + }, + { + "epoch": 0.31674805331925565, + "grad_norm": 27.58684730529785, + "learning_rate": 1.2662971760358934e-06, + "loss": 0.2014, + "num_input_tokens_seen": 1178112, + "step": 2400 + }, + { + "epoch": 0.3174079450970041, + "grad_norm": 0.15001633763313293, + "learning_rate": 1.2689363948271312e-06, + "loss": 0.0012, + "num_input_tokens_seen": 1180352, + "step": 2405 + }, + { + "epoch": 0.31806783687475254, + "grad_norm": 0.09786776453256607, + "learning_rate": 1.2715756136183688e-06, + "loss": 0.1906, + "num_input_tokens_seen": 1182528, + "step": 2410 + }, + { + "epoch": 0.318727728652501, + "grad_norm": 0.4362722337245941, + "learning_rate": 1.2742148324096067e-06, + "loss": 0.018, + "num_input_tokens_seen": 1185280, + "step": 2415 + }, + { + "epoch": 0.31938762043024943, + "grad_norm": 27.211410522460938, + "learning_rate": 1.2768540512008445e-06, + "loss": 0.0975, + "num_input_tokens_seen": 1188032, + "step": 2420 + }, + { + "epoch": 0.3200475122079979, + "grad_norm": 20.800615310668945, + "learning_rate": 1.279493269992082e-06, + "loss": 0.2283, + "num_input_tokens_seen": 1190464, + "step": 2425 + }, + { + "epoch": 0.3207074039857463, + "grad_norm": 42.08029556274414, + "learning_rate": 1.28213248878332e-06, + "loss": 0.192, + "num_input_tokens_seen": 1192960, + "step": 2430 + }, + { + "epoch": 0.32136729576349476, + "grad_norm": 0.1869146227836609, + "learning_rate": 1.2847717075745578e-06, + "loss": 0.0019, + "num_input_tokens_seen": 1195072, + "step": 2435 + }, + { + "epoch": 0.3220271875412432, + "grad_norm": 0.02612818218767643, + "learning_rate": 1.2874109263657958e-06, + "loss": 0.0131, + "num_input_tokens_seen": 1197376, + "step": 2440 + }, + { + "epoch": 0.3226870793189917, + "grad_norm": 0.04093582555651665, + "learning_rate": 1.2900501451570334e-06, + "loss": 0.0088, + "num_input_tokens_seen": 1199936, + "step": 2445 + }, + { + "epoch": 0.32334697109674015, + "grad_norm": 187.89329528808594, + "learning_rate": 1.2926893639482712e-06, + "loss": 0.0404, + "num_input_tokens_seen": 1202368, + "step": 2450 + }, + { + "epoch": 0.3240068628744886, + "grad_norm": 0.03035602904856205, + "learning_rate": 1.2953285827395091e-06, + "loss": 0.0721, + "num_input_tokens_seen": 1204800, + "step": 2455 + }, + { + "epoch": 0.32466675465223704, + "grad_norm": 29.28488540649414, + "learning_rate": 1.297967801530747e-06, + "loss": 0.1065, + "num_input_tokens_seen": 1207552, + "step": 2460 + }, + { + "epoch": 0.3253266464299855, + "grad_norm": 0.03032883256673813, + "learning_rate": 1.3006070203219845e-06, + "loss": 0.2085, + "num_input_tokens_seen": 1209856, + "step": 2465 + }, + { + "epoch": 0.32598653820773393, + "grad_norm": 0.10402119904756546, + "learning_rate": 1.3032462391132225e-06, + "loss": 0.1298, + "num_input_tokens_seen": 1212352, + "step": 2470 + }, + { + "epoch": 0.3266464299854824, + "grad_norm": 0.1559586375951767, + "learning_rate": 1.3058854579044602e-06, + "loss": 0.0053, + "num_input_tokens_seen": 1214912, + "step": 2475 + }, + { + "epoch": 0.3273063217632308, + "grad_norm": 0.02763325348496437, + "learning_rate": 1.308524676695698e-06, + "loss": 0.0804, + "num_input_tokens_seen": 1217088, + "step": 2480 + }, + { + "epoch": 0.32796621354097927, + "grad_norm": 1.776210904121399, + "learning_rate": 1.3111638954869358e-06, + "loss": 0.1441, + "num_input_tokens_seen": 1219584, + "step": 2485 + }, + { + "epoch": 0.3286261053187277, + "grad_norm": 67.13372802734375, + "learning_rate": 1.3138031142781736e-06, + "loss": 0.1647, + "num_input_tokens_seen": 1222336, + "step": 2490 + }, + { + "epoch": 0.32928599709647616, + "grad_norm": 2.0714035034179688, + "learning_rate": 1.3164423330694115e-06, + "loss": 0.16, + "num_input_tokens_seen": 1224832, + "step": 2495 + }, + { + "epoch": 0.3299458888742246, + "grad_norm": 21.270225524902344, + "learning_rate": 1.3190815518606493e-06, + "loss": 0.3234, + "num_input_tokens_seen": 1227392, + "step": 2500 + }, + { + "epoch": 0.3306057806519731, + "grad_norm": 16.29952621459961, + "learning_rate": 1.3217207706518869e-06, + "loss": 0.1718, + "num_input_tokens_seen": 1229952, + "step": 2505 + }, + { + "epoch": 0.33126567242972155, + "grad_norm": 1.428592324256897, + "learning_rate": 1.3243599894431249e-06, + "loss": 0.0817, + "num_input_tokens_seen": 1232640, + "step": 2510 + }, + { + "epoch": 0.33192556420747, + "grad_norm": 163.8802490234375, + "learning_rate": 1.3269992082343626e-06, + "loss": 0.0409, + "num_input_tokens_seen": 1235200, + "step": 2515 + }, + { + "epoch": 0.33258545598521844, + "grad_norm": 0.16034527122974396, + "learning_rate": 1.3296384270256002e-06, + "loss": 0.0289, + "num_input_tokens_seen": 1237632, + "step": 2520 + }, + { + "epoch": 0.3332453477629669, + "grad_norm": 0.22546841204166412, + "learning_rate": 1.3322776458168382e-06, + "loss": 0.1123, + "num_input_tokens_seen": 1239936, + "step": 2525 + }, + { + "epoch": 0.3339052395407153, + "grad_norm": 0.36573493480682373, + "learning_rate": 1.334916864608076e-06, + "loss": 0.2255, + "num_input_tokens_seen": 1242304, + "step": 2530 + }, + { + "epoch": 0.33456513131846377, + "grad_norm": 0.039298903197050095, + "learning_rate": 1.3375560833993137e-06, + "loss": 0.0004, + "num_input_tokens_seen": 1244736, + "step": 2535 + }, + { + "epoch": 0.3352250230962122, + "grad_norm": 1.0564583539962769, + "learning_rate": 1.3401953021905515e-06, + "loss": 0.2463, + "num_input_tokens_seen": 1247488, + "step": 2540 + }, + { + "epoch": 0.33588491487396066, + "grad_norm": 0.1800013780593872, + "learning_rate": 1.3428345209817893e-06, + "loss": 0.1187, + "num_input_tokens_seen": 1250048, + "step": 2545 + }, + { + "epoch": 0.3365448066517091, + "grad_norm": 191.12794494628906, + "learning_rate": 1.345473739773027e-06, + "loss": 0.2226, + "num_input_tokens_seen": 1252928, + "step": 2550 + }, + { + "epoch": 0.33720469842945755, + "grad_norm": 0.26806724071502686, + "learning_rate": 1.348112958564265e-06, + "loss": 0.2176, + "num_input_tokens_seen": 1255488, + "step": 2555 + }, + { + "epoch": 0.337864590207206, + "grad_norm": 52.88833236694336, + "learning_rate": 1.3507521773555026e-06, + "loss": 0.4771, + "num_input_tokens_seen": 1257984, + "step": 2560 + }, + { + "epoch": 0.3385244819849545, + "grad_norm": 0.3574354648590088, + "learning_rate": 1.3533913961467406e-06, + "loss": 0.1681, + "num_input_tokens_seen": 1260416, + "step": 2565 + }, + { + "epoch": 0.33918437376270294, + "grad_norm": 48.450592041015625, + "learning_rate": 1.3560306149379783e-06, + "loss": 0.1162, + "num_input_tokens_seen": 1263168, + "step": 2570 + }, + { + "epoch": 0.3398442655404514, + "grad_norm": 0.33409392833709717, + "learning_rate": 1.3586698337292161e-06, + "loss": 0.1092, + "num_input_tokens_seen": 1265600, + "step": 2575 + }, + { + "epoch": 0.34050415731819983, + "grad_norm": 0.06134882941842079, + "learning_rate": 1.361309052520454e-06, + "loss": 0.0017, + "num_input_tokens_seen": 1268032, + "step": 2580 + }, + { + "epoch": 0.3411640490959483, + "grad_norm": 205.01483154296875, + "learning_rate": 1.3639482713116917e-06, + "loss": 0.124, + "num_input_tokens_seen": 1270336, + "step": 2585 + }, + { + "epoch": 0.3418239408736967, + "grad_norm": 27.653396606445312, + "learning_rate": 1.3665874901029294e-06, + "loss": 0.0837, + "num_input_tokens_seen": 1273088, + "step": 2590 + }, + { + "epoch": 0.34248383265144516, + "grad_norm": 0.3572693467140198, + "learning_rate": 1.3692267088941674e-06, + "loss": 0.2435, + "num_input_tokens_seen": 1275456, + "step": 2595 + }, + { + "epoch": 0.3431437244291936, + "grad_norm": 0.15710730850696564, + "learning_rate": 1.371865927685405e-06, + "loss": 0.0428, + "num_input_tokens_seen": 1277888, + "step": 2600 + }, + { + "epoch": 0.34380361620694205, + "grad_norm": 2.2360987663269043, + "learning_rate": 1.3745051464766428e-06, + "loss": 0.0011, + "num_input_tokens_seen": 1280384, + "step": 2605 + }, + { + "epoch": 0.3444635079846905, + "grad_norm": 0.7117704749107361, + "learning_rate": 1.3771443652678807e-06, + "loss": 0.0331, + "num_input_tokens_seen": 1282944, + "step": 2610 + }, + { + "epoch": 0.34512339976243894, + "grad_norm": 147.02932739257812, + "learning_rate": 1.3797835840591185e-06, + "loss": 0.1367, + "num_input_tokens_seen": 1285184, + "step": 2615 + }, + { + "epoch": 0.3457832915401874, + "grad_norm": 0.08217495679855347, + "learning_rate": 1.382422802850356e-06, + "loss": 0.202, + "num_input_tokens_seen": 1287808, + "step": 2620 + }, + { + "epoch": 0.34644318331793583, + "grad_norm": 1.0039873123168945, + "learning_rate": 1.385062021641594e-06, + "loss": 0.1634, + "num_input_tokens_seen": 1290432, + "step": 2625 + }, + { + "epoch": 0.34710307509568433, + "grad_norm": 0.36813926696777344, + "learning_rate": 1.3877012404328318e-06, + "loss": 0.1988, + "num_input_tokens_seen": 1292736, + "step": 2630 + }, + { + "epoch": 0.3477629668734328, + "grad_norm": 0.5801262259483337, + "learning_rate": 1.3903404592240694e-06, + "loss": 0.1405, + "num_input_tokens_seen": 1295104, + "step": 2635 + }, + { + "epoch": 0.3484228586511812, + "grad_norm": 0.835560142993927, + "learning_rate": 1.3929796780153074e-06, + "loss": 0.0939, + "num_input_tokens_seen": 1297600, + "step": 2640 + }, + { + "epoch": 0.34908275042892967, + "grad_norm": 0.08413344621658325, + "learning_rate": 1.3956188968065452e-06, + "loss": 0.0013, + "num_input_tokens_seen": 1300224, + "step": 2645 + }, + { + "epoch": 0.3497426422066781, + "grad_norm": 0.3069940209388733, + "learning_rate": 1.3982581155977831e-06, + "loss": 0.1809, + "num_input_tokens_seen": 1302528, + "step": 2650 + }, + { + "epoch": 0.35040253398442656, + "grad_norm": 0.15991085767745972, + "learning_rate": 1.4008973343890207e-06, + "loss": 0.2074, + "num_input_tokens_seen": 1305024, + "step": 2655 + }, + { + "epoch": 0.351062425762175, + "grad_norm": 0.2440817505121231, + "learning_rate": 1.4035365531802585e-06, + "loss": 0.2161, + "num_input_tokens_seen": 1307392, + "step": 2660 + }, + { + "epoch": 0.35172231753992345, + "grad_norm": 0.7089338302612305, + "learning_rate": 1.4061757719714965e-06, + "loss": 0.2352, + "num_input_tokens_seen": 1310016, + "step": 2665 + }, + { + "epoch": 0.3523822093176719, + "grad_norm": 0.12224601209163666, + "learning_rate": 1.4088149907627342e-06, + "loss": 0.0256, + "num_input_tokens_seen": 1312576, + "step": 2670 + }, + { + "epoch": 0.35304210109542034, + "grad_norm": 0.06766802072525024, + "learning_rate": 1.4114542095539718e-06, + "loss": 0.1432, + "num_input_tokens_seen": 1315072, + "step": 2675 + }, + { + "epoch": 0.3537019928731688, + "grad_norm": 47.305416107177734, + "learning_rate": 1.4140934283452098e-06, + "loss": 0.1524, + "num_input_tokens_seen": 1317504, + "step": 2680 + }, + { + "epoch": 0.3543618846509172, + "grad_norm": 0.1390659511089325, + "learning_rate": 1.4167326471364476e-06, + "loss": 0.001, + "num_input_tokens_seen": 1319680, + "step": 2685 + }, + { + "epoch": 0.3550217764286657, + "grad_norm": 0.08814946562051773, + "learning_rate": 1.4193718659276853e-06, + "loss": 0.0008, + "num_input_tokens_seen": 1322240, + "step": 2690 + }, + { + "epoch": 0.35568166820641417, + "grad_norm": 18.944007873535156, + "learning_rate": 1.4220110847189231e-06, + "loss": 0.1094, + "num_input_tokens_seen": 1324736, + "step": 2695 + }, + { + "epoch": 0.3563415599841626, + "grad_norm": 0.02471146546304226, + "learning_rate": 1.4246503035101609e-06, + "loss": 0.1183, + "num_input_tokens_seen": 1327104, + "step": 2700 + }, + { + "epoch": 0.35700145176191106, + "grad_norm": 28.54195213317871, + "learning_rate": 1.4272895223013989e-06, + "loss": 0.2524, + "num_input_tokens_seen": 1329920, + "step": 2705 + }, + { + "epoch": 0.3576613435396595, + "grad_norm": 0.09029418230056763, + "learning_rate": 1.4299287410926366e-06, + "loss": 0.1706, + "num_input_tokens_seen": 1332352, + "step": 2710 + }, + { + "epoch": 0.35832123531740795, + "grad_norm": 32.86302185058594, + "learning_rate": 1.4325679598838742e-06, + "loss": 0.0205, + "num_input_tokens_seen": 1335040, + "step": 2715 + }, + { + "epoch": 0.3589811270951564, + "grad_norm": 0.2393476665019989, + "learning_rate": 1.4352071786751122e-06, + "loss": 0.003, + "num_input_tokens_seen": 1337472, + "step": 2720 + }, + { + "epoch": 0.35964101887290484, + "grad_norm": 77.94792938232422, + "learning_rate": 1.43784639746635e-06, + "loss": 0.0809, + "num_input_tokens_seen": 1339712, + "step": 2725 + }, + { + "epoch": 0.3603009106506533, + "grad_norm": 0.08134565502405167, + "learning_rate": 1.4404856162575877e-06, + "loss": 0.0011, + "num_input_tokens_seen": 1342144, + "step": 2730 + }, + { + "epoch": 0.36096080242840173, + "grad_norm": 0.4735080897808075, + "learning_rate": 1.4431248350488255e-06, + "loss": 0.0817, + "num_input_tokens_seen": 1344512, + "step": 2735 + }, + { + "epoch": 0.3616206942061502, + "grad_norm": 395.6458435058594, + "learning_rate": 1.4457640538400633e-06, + "loss": 0.4208, + "num_input_tokens_seen": 1346880, + "step": 2740 + }, + { + "epoch": 0.3622805859838986, + "grad_norm": 11.863261222839355, + "learning_rate": 1.448403272631301e-06, + "loss": 0.0683, + "num_input_tokens_seen": 1349056, + "step": 2745 + }, + { + "epoch": 0.36294047776164706, + "grad_norm": 166.0413360595703, + "learning_rate": 1.4510424914225388e-06, + "loss": 0.1806, + "num_input_tokens_seen": 1351488, + "step": 2750 + }, + { + "epoch": 0.36360036953939556, + "grad_norm": 0.06676194816827774, + "learning_rate": 1.4536817102137766e-06, + "loss": 0.0011, + "num_input_tokens_seen": 1353920, + "step": 2755 + }, + { + "epoch": 0.364260261317144, + "grad_norm": 0.4566335678100586, + "learning_rate": 1.4563209290050144e-06, + "loss": 0.3443, + "num_input_tokens_seen": 1356352, + "step": 2760 + }, + { + "epoch": 0.36492015309489245, + "grad_norm": 0.3084438443183899, + "learning_rate": 1.4589601477962524e-06, + "loss": 0.2605, + "num_input_tokens_seen": 1359104, + "step": 2765 + }, + { + "epoch": 0.3655800448726409, + "grad_norm": 38.383583068847656, + "learning_rate": 1.46159936658749e-06, + "loss": 0.2077, + "num_input_tokens_seen": 1361536, + "step": 2770 + }, + { + "epoch": 0.36623993665038934, + "grad_norm": 0.5150312781333923, + "learning_rate": 1.464238585378728e-06, + "loss": 0.3646, + "num_input_tokens_seen": 1364160, + "step": 2775 + }, + { + "epoch": 0.3668998284281378, + "grad_norm": 6.813530445098877, + "learning_rate": 1.4668778041699657e-06, + "loss": 0.0023, + "num_input_tokens_seen": 1366656, + "step": 2780 + }, + { + "epoch": 0.36755972020588623, + "grad_norm": 40.210357666015625, + "learning_rate": 1.4695170229612034e-06, + "loss": 0.142, + "num_input_tokens_seen": 1369216, + "step": 2785 + }, + { + "epoch": 0.3682196119836347, + "grad_norm": 0.21548590064048767, + "learning_rate": 1.4721562417524412e-06, + "loss": 0.2354, + "num_input_tokens_seen": 1371712, + "step": 2790 + }, + { + "epoch": 0.3688795037613831, + "grad_norm": 0.28355929255485535, + "learning_rate": 1.474795460543679e-06, + "loss": 0.1816, + "num_input_tokens_seen": 1374080, + "step": 2795 + }, + { + "epoch": 0.36953939553913157, + "grad_norm": 1.1086498498916626, + "learning_rate": 1.4774346793349168e-06, + "loss": 0.1143, + "num_input_tokens_seen": 1377024, + "step": 2800 + }, + { + "epoch": 0.37019928731688, + "grad_norm": 0.7446885704994202, + "learning_rate": 1.4800738981261548e-06, + "loss": 0.122, + "num_input_tokens_seen": 1379392, + "step": 2805 + }, + { + "epoch": 0.37085917909462845, + "grad_norm": 0.14264525473117828, + "learning_rate": 1.4827131169173923e-06, + "loss": 0.1612, + "num_input_tokens_seen": 1381760, + "step": 2810 + }, + { + "epoch": 0.37151907087237696, + "grad_norm": 0.19203917682170868, + "learning_rate": 1.48535233570863e-06, + "loss": 0.0652, + "num_input_tokens_seen": 1384128, + "step": 2815 + }, + { + "epoch": 0.3721789626501254, + "grad_norm": 0.036427125334739685, + "learning_rate": 1.487991554499868e-06, + "loss": 0.053, + "num_input_tokens_seen": 1386496, + "step": 2820 + }, + { + "epoch": 0.37283885442787384, + "grad_norm": 273.5498046875, + "learning_rate": 1.4906307732911058e-06, + "loss": 0.1133, + "num_input_tokens_seen": 1388736, + "step": 2825 + }, + { + "epoch": 0.3734987462056223, + "grad_norm": 95.69959259033203, + "learning_rate": 1.4932699920823434e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1391040, + "step": 2830 + }, + { + "epoch": 0.37415863798337073, + "grad_norm": 93.49053955078125, + "learning_rate": 1.4959092108735814e-06, + "loss": 0.093, + "num_input_tokens_seen": 1393344, + "step": 2835 + }, + { + "epoch": 0.3748185297611192, + "grad_norm": 1.0124017000198364, + "learning_rate": 1.4985484296648192e-06, + "loss": 0.074, + "num_input_tokens_seen": 1395648, + "step": 2840 + }, + { + "epoch": 0.3754784215388676, + "grad_norm": 0.015571318566799164, + "learning_rate": 1.5011876484560567e-06, + "loss": 0.2042, + "num_input_tokens_seen": 1398272, + "step": 2845 + }, + { + "epoch": 0.37613831331661607, + "grad_norm": 0.0427069365978241, + "learning_rate": 1.5038268672472947e-06, + "loss": 0.2285, + "num_input_tokens_seen": 1400576, + "step": 2850 + }, + { + "epoch": 0.3767982050943645, + "grad_norm": 0.03314981982111931, + "learning_rate": 1.5064660860385325e-06, + "loss": 0.049, + "num_input_tokens_seen": 1403136, + "step": 2855 + }, + { + "epoch": 0.37745809687211296, + "grad_norm": 0.028006955981254578, + "learning_rate": 1.5091053048297705e-06, + "loss": 0.0759, + "num_input_tokens_seen": 1405504, + "step": 2860 + }, + { + "epoch": 0.3781179886498614, + "grad_norm": 31.784631729125977, + "learning_rate": 1.511744523621008e-06, + "loss": 0.2051, + "num_input_tokens_seen": 1407808, + "step": 2865 + }, + { + "epoch": 0.37877788042760985, + "grad_norm": 79.9329605102539, + "learning_rate": 1.5143837424122458e-06, + "loss": 0.1983, + "num_input_tokens_seen": 1410304, + "step": 2870 + }, + { + "epoch": 0.37943777220535835, + "grad_norm": 0.17618303000926971, + "learning_rate": 1.5170229612034838e-06, + "loss": 0.0052, + "num_input_tokens_seen": 1412736, + "step": 2875 + }, + { + "epoch": 0.3800976639831068, + "grad_norm": 70.05154418945312, + "learning_rate": 1.5196621799947216e-06, + "loss": 0.1043, + "num_input_tokens_seen": 1415488, + "step": 2880 + }, + { + "epoch": 0.38075755576085524, + "grad_norm": 0.05183727666735649, + "learning_rate": 1.5223013987859591e-06, + "loss": 0.148, + "num_input_tokens_seen": 1417856, + "step": 2885 + }, + { + "epoch": 0.3814174475386037, + "grad_norm": 161.5440673828125, + "learning_rate": 1.5249406175771971e-06, + "loss": 0.1376, + "num_input_tokens_seen": 1420096, + "step": 2890 + }, + { + "epoch": 0.3820773393163521, + "grad_norm": 38.89706039428711, + "learning_rate": 1.5275798363684349e-06, + "loss": 0.1461, + "num_input_tokens_seen": 1422656, + "step": 2895 + }, + { + "epoch": 0.38273723109410057, + "grad_norm": 55.588035583496094, + "learning_rate": 1.5302190551596727e-06, + "loss": 0.1672, + "num_input_tokens_seen": 1425152, + "step": 2900 + }, + { + "epoch": 0.383397122871849, + "grad_norm": 0.15920549631118774, + "learning_rate": 1.5328582739509104e-06, + "loss": 0.0423, + "num_input_tokens_seen": 1427840, + "step": 2905 + }, + { + "epoch": 0.38405701464959746, + "grad_norm": 0.49687254428863525, + "learning_rate": 1.5354974927421482e-06, + "loss": 0.0218, + "num_input_tokens_seen": 1430144, + "step": 2910 + }, + { + "epoch": 0.3847169064273459, + "grad_norm": 0.13978274166584015, + "learning_rate": 1.5381367115333862e-06, + "loss": 0.1745, + "num_input_tokens_seen": 1432448, + "step": 2915 + }, + { + "epoch": 0.38537679820509435, + "grad_norm": 0.2328559309244156, + "learning_rate": 1.540775930324624e-06, + "loss": 0.0026, + "num_input_tokens_seen": 1434752, + "step": 2920 + }, + { + "epoch": 0.3860366899828428, + "grad_norm": 1.778712511062622, + "learning_rate": 1.5434151491158615e-06, + "loss": 0.001, + "num_input_tokens_seen": 1437184, + "step": 2925 + }, + { + "epoch": 0.38669658176059124, + "grad_norm": 0.156612828373909, + "learning_rate": 1.5460543679070995e-06, + "loss": 0.051, + "num_input_tokens_seen": 1440000, + "step": 2930 + }, + { + "epoch": 0.3873564735383397, + "grad_norm": 0.6022273898124695, + "learning_rate": 1.5486935866983373e-06, + "loss": 0.0864, + "num_input_tokens_seen": 1442624, + "step": 2935 + }, + { + "epoch": 0.3880163653160882, + "grad_norm": 0.10904964804649353, + "learning_rate": 1.551332805489575e-06, + "loss": 0.291, + "num_input_tokens_seen": 1445184, + "step": 2940 + }, + { + "epoch": 0.38867625709383663, + "grad_norm": 0.8670811653137207, + "learning_rate": 1.5539720242808128e-06, + "loss": 0.2756, + "num_input_tokens_seen": 1447616, + "step": 2945 + }, + { + "epoch": 0.3893361488715851, + "grad_norm": 1.617418646812439, + "learning_rate": 1.5566112430720506e-06, + "loss": 0.0006, + "num_input_tokens_seen": 1450176, + "step": 2950 + }, + { + "epoch": 0.3899960406493335, + "grad_norm": 101.99866485595703, + "learning_rate": 1.5592504618632884e-06, + "loss": 0.4024, + "num_input_tokens_seen": 1452672, + "step": 2955 + }, + { + "epoch": 0.39065593242708196, + "grad_norm": 116.28324890136719, + "learning_rate": 1.5618896806545262e-06, + "loss": 0.2948, + "num_input_tokens_seen": 1455232, + "step": 2960 + }, + { + "epoch": 0.3913158242048304, + "grad_norm": 67.55413818359375, + "learning_rate": 1.564528899445764e-06, + "loss": 0.2825, + "num_input_tokens_seen": 1457792, + "step": 2965 + }, + { + "epoch": 0.39197571598257885, + "grad_norm": 1.1112959384918213, + "learning_rate": 1.5671681182370017e-06, + "loss": 0.0889, + "num_input_tokens_seen": 1460160, + "step": 2970 + }, + { + "epoch": 0.3926356077603273, + "grad_norm": 4.917082786560059, + "learning_rate": 1.5698073370282397e-06, + "loss": 0.074, + "num_input_tokens_seen": 1462400, + "step": 2975 + }, + { + "epoch": 0.39329549953807574, + "grad_norm": 1.6990487575531006, + "learning_rate": 1.5724465558194772e-06, + "loss": 0.128, + "num_input_tokens_seen": 1465088, + "step": 2980 + }, + { + "epoch": 0.3939553913158242, + "grad_norm": 2.347425699234009, + "learning_rate": 1.5750857746107152e-06, + "loss": 0.1378, + "num_input_tokens_seen": 1467456, + "step": 2985 + }, + { + "epoch": 0.39461528309357263, + "grad_norm": 0.054718267172575, + "learning_rate": 1.577724993401953e-06, + "loss": 0.0013, + "num_input_tokens_seen": 1470016, + "step": 2990 + }, + { + "epoch": 0.3952751748713211, + "grad_norm": 0.06302514672279358, + "learning_rate": 1.5803642121931908e-06, + "loss": 0.0779, + "num_input_tokens_seen": 1472448, + "step": 2995 + }, + { + "epoch": 0.3959350666490696, + "grad_norm": 0.8244885802268982, + "learning_rate": 1.5830034309844285e-06, + "loss": 0.0693, + "num_input_tokens_seen": 1475072, + "step": 3000 + }, + { + "epoch": 0.396594958426818, + "grad_norm": 0.21904513239860535, + "learning_rate": 1.5856426497756663e-06, + "loss": 0.063, + "num_input_tokens_seen": 1477440, + "step": 3005 + }, + { + "epoch": 0.39725485020456647, + "grad_norm": 0.9207690358161926, + "learning_rate": 1.588281868566904e-06, + "loss": 0.0011, + "num_input_tokens_seen": 1479872, + "step": 3010 + }, + { + "epoch": 0.3979147419823149, + "grad_norm": 125.98383331298828, + "learning_rate": 1.590921087358142e-06, + "loss": 0.2275, + "num_input_tokens_seen": 1481920, + "step": 3015 + }, + { + "epoch": 0.39857463376006336, + "grad_norm": 0.06044726446270943, + "learning_rate": 1.5935603061493796e-06, + "loss": 0.2769, + "num_input_tokens_seen": 1484992, + "step": 3020 + }, + { + "epoch": 0.3992345255378118, + "grad_norm": 0.15632975101470947, + "learning_rate": 1.5961995249406174e-06, + "loss": 0.3505, + "num_input_tokens_seen": 1487232, + "step": 3025 + }, + { + "epoch": 0.39989441731556025, + "grad_norm": 217.93968200683594, + "learning_rate": 1.5988387437318554e-06, + "loss": 0.19, + "num_input_tokens_seen": 1489728, + "step": 3030 + }, + { + "epoch": 0.4005543090933087, + "grad_norm": 6.839831352233887, + "learning_rate": 1.6014779625230932e-06, + "loss": 0.2316, + "num_input_tokens_seen": 1491968, + "step": 3035 + }, + { + "epoch": 0.40121420087105714, + "grad_norm": 93.5851821899414, + "learning_rate": 1.6041171813143307e-06, + "loss": 0.1632, + "num_input_tokens_seen": 1494336, + "step": 3040 + }, + { + "epoch": 0.4018740926488056, + "grad_norm": 0.35064733028411865, + "learning_rate": 1.6067564001055687e-06, + "loss": 0.1404, + "num_input_tokens_seen": 1497472, + "step": 3045 + }, + { + "epoch": 0.402533984426554, + "grad_norm": 0.19418714940547943, + "learning_rate": 1.6093956188968065e-06, + "loss": 0.085, + "num_input_tokens_seen": 1499968, + "step": 3050 + }, + { + "epoch": 0.40319387620430247, + "grad_norm": 0.04916776716709137, + "learning_rate": 1.612034837688044e-06, + "loss": 0.0344, + "num_input_tokens_seen": 1502336, + "step": 3055 + }, + { + "epoch": 0.40385376798205097, + "grad_norm": 35.16215896606445, + "learning_rate": 1.614674056479282e-06, + "loss": 0.2477, + "num_input_tokens_seen": 1504576, + "step": 3060 + }, + { + "epoch": 0.4045136597597994, + "grad_norm": 78.75347900390625, + "learning_rate": 1.6173132752705198e-06, + "loss": 0.2589, + "num_input_tokens_seen": 1507136, + "step": 3065 + }, + { + "epoch": 0.40517355153754786, + "grad_norm": 1.299813985824585, + "learning_rate": 1.6199524940617578e-06, + "loss": 0.0691, + "num_input_tokens_seen": 1509696, + "step": 3070 + }, + { + "epoch": 0.4058334433152963, + "grad_norm": 0.5724449157714844, + "learning_rate": 1.6225917128529954e-06, + "loss": 0.2528, + "num_input_tokens_seen": 1512192, + "step": 3075 + }, + { + "epoch": 0.40649333509304475, + "grad_norm": 0.24407175183296204, + "learning_rate": 1.6252309316442331e-06, + "loss": 0.0691, + "num_input_tokens_seen": 1514752, + "step": 3080 + }, + { + "epoch": 0.4071532268707932, + "grad_norm": 34.682369232177734, + "learning_rate": 1.6278701504354711e-06, + "loss": 0.3173, + "num_input_tokens_seen": 1517440, + "step": 3085 + }, + { + "epoch": 0.40781311864854164, + "grad_norm": 0.07574780285358429, + "learning_rate": 1.6305093692267089e-06, + "loss": 0.0856, + "num_input_tokens_seen": 1519744, + "step": 3090 + }, + { + "epoch": 0.4084730104262901, + "grad_norm": 338.62677001953125, + "learning_rate": 1.6331485880179465e-06, + "loss": 0.1111, + "num_input_tokens_seen": 1522176, + "step": 3095 + }, + { + "epoch": 0.40913290220403853, + "grad_norm": 0.1823023557662964, + "learning_rate": 1.6357878068091844e-06, + "loss": 0.0694, + "num_input_tokens_seen": 1524352, + "step": 3100 + }, + { + "epoch": 0.409792793981787, + "grad_norm": 67.96218872070312, + "learning_rate": 1.6384270256004222e-06, + "loss": 0.2666, + "num_input_tokens_seen": 1526976, + "step": 3105 + }, + { + "epoch": 0.4104526857595354, + "grad_norm": 0.4283585548400879, + "learning_rate": 1.64106624439166e-06, + "loss": 0.1743, + "num_input_tokens_seen": 1529152, + "step": 3110 + }, + { + "epoch": 0.41111257753728386, + "grad_norm": 0.08526215702295303, + "learning_rate": 1.6437054631828978e-06, + "loss": 0.0033, + "num_input_tokens_seen": 1531648, + "step": 3115 + }, + { + "epoch": 0.4117724693150323, + "grad_norm": 0.20778554677963257, + "learning_rate": 1.6463446819741355e-06, + "loss": 0.1017, + "num_input_tokens_seen": 1533952, + "step": 3120 + }, + { + "epoch": 0.4124323610927808, + "grad_norm": 92.7779769897461, + "learning_rate": 1.6489839007653735e-06, + "loss": 0.294, + "num_input_tokens_seen": 1536640, + "step": 3125 + }, + { + "epoch": 0.41309225287052925, + "grad_norm": 0.7637858986854553, + "learning_rate": 1.6516231195566113e-06, + "loss": 0.0033, + "num_input_tokens_seen": 1539200, + "step": 3130 + }, + { + "epoch": 0.4137521446482777, + "grad_norm": 7.557998180389404, + "learning_rate": 1.6542623383478489e-06, + "loss": 0.1664, + "num_input_tokens_seen": 1541632, + "step": 3135 + }, + { + "epoch": 0.41441203642602614, + "grad_norm": 36.74544906616211, + "learning_rate": 1.6569015571390868e-06, + "loss": 0.0619, + "num_input_tokens_seen": 1544128, + "step": 3140 + }, + { + "epoch": 0.4150719282037746, + "grad_norm": 0.06126031652092934, + "learning_rate": 1.6595407759303246e-06, + "loss": 0.0231, + "num_input_tokens_seen": 1546368, + "step": 3145 + }, + { + "epoch": 0.41573181998152303, + "grad_norm": 1.9612305164337158, + "learning_rate": 1.6621799947215624e-06, + "loss": 0.4345, + "num_input_tokens_seen": 1548736, + "step": 3150 + }, + { + "epoch": 0.4163917117592715, + "grad_norm": 0.30381351709365845, + "learning_rate": 1.6648192135128002e-06, + "loss": 0.2605, + "num_input_tokens_seen": 1551168, + "step": 3155 + }, + { + "epoch": 0.4170516035370199, + "grad_norm": 0.25879448652267456, + "learning_rate": 1.667458432304038e-06, + "loss": 0.1204, + "num_input_tokens_seen": 1553664, + "step": 3160 + }, + { + "epoch": 0.41771149531476837, + "grad_norm": 109.06922149658203, + "learning_rate": 1.6700976510952757e-06, + "loss": 0.1409, + "num_input_tokens_seen": 1555968, + "step": 3165 + }, + { + "epoch": 0.4183713870925168, + "grad_norm": 45.09346389770508, + "learning_rate": 1.6727368698865135e-06, + "loss": 0.064, + "num_input_tokens_seen": 1558208, + "step": 3170 + }, + { + "epoch": 0.41903127887026526, + "grad_norm": 1.3239551782608032, + "learning_rate": 1.6753760886777513e-06, + "loss": 0.0094, + "num_input_tokens_seen": 1560640, + "step": 3175 + }, + { + "epoch": 0.4196911706480137, + "grad_norm": 2.044196128845215, + "learning_rate": 1.678015307468989e-06, + "loss": 0.0375, + "num_input_tokens_seen": 1563328, + "step": 3180 + }, + { + "epoch": 0.4203510624257622, + "grad_norm": 0.24516402184963226, + "learning_rate": 1.680654526260227e-06, + "loss": 0.1175, + "num_input_tokens_seen": 1565696, + "step": 3185 + }, + { + "epoch": 0.42101095420351065, + "grad_norm": 0.20431405305862427, + "learning_rate": 1.6832937450514646e-06, + "loss": 0.4013, + "num_input_tokens_seen": 1568640, + "step": 3190 + }, + { + "epoch": 0.4216708459812591, + "grad_norm": 9.456520080566406, + "learning_rate": 1.6859329638427023e-06, + "loss": 0.0542, + "num_input_tokens_seen": 1571328, + "step": 3195 + }, + { + "epoch": 0.42233073775900754, + "grad_norm": 26.781938552856445, + "learning_rate": 1.6885721826339403e-06, + "loss": 0.5376, + "num_input_tokens_seen": 1574016, + "step": 3200 + }, + { + "epoch": 0.422990629536756, + "grad_norm": 37.978271484375, + "learning_rate": 1.691211401425178e-06, + "loss": 0.2516, + "num_input_tokens_seen": 1576384, + "step": 3205 + }, + { + "epoch": 0.4236505213145044, + "grad_norm": 0.1549421101808548, + "learning_rate": 1.6938506202164159e-06, + "loss": 0.217, + "num_input_tokens_seen": 1579200, + "step": 3210 + }, + { + "epoch": 0.42431041309225287, + "grad_norm": 54.167076110839844, + "learning_rate": 1.6964898390076536e-06, + "loss": 0.1522, + "num_input_tokens_seen": 1581632, + "step": 3215 + }, + { + "epoch": 0.4249703048700013, + "grad_norm": 1.0169860124588013, + "learning_rate": 1.6991290577988914e-06, + "loss": 0.0283, + "num_input_tokens_seen": 1584064, + "step": 3220 + }, + { + "epoch": 0.42563019664774976, + "grad_norm": 52.26798629760742, + "learning_rate": 1.7017682765901294e-06, + "loss": 0.0425, + "num_input_tokens_seen": 1586752, + "step": 3225 + }, + { + "epoch": 0.4262900884254982, + "grad_norm": 19.18907356262207, + "learning_rate": 1.704407495381367e-06, + "loss": 0.1471, + "num_input_tokens_seen": 1589248, + "step": 3230 + }, + { + "epoch": 0.42694998020324665, + "grad_norm": 0.09750813245773315, + "learning_rate": 1.7070467141726047e-06, + "loss": 0.0052, + "num_input_tokens_seen": 1591552, + "step": 3235 + }, + { + "epoch": 0.4276098719809951, + "grad_norm": 1.1829841136932373, + "learning_rate": 1.7096859329638427e-06, + "loss": 0.0685, + "num_input_tokens_seen": 1594048, + "step": 3240 + }, + { + "epoch": 0.4282697637587436, + "grad_norm": 74.45574188232422, + "learning_rate": 1.7123251517550805e-06, + "loss": 0.2108, + "num_input_tokens_seen": 1596544, + "step": 3245 + }, + { + "epoch": 0.42892965553649204, + "grad_norm": 41.79922866821289, + "learning_rate": 1.714964370546318e-06, + "loss": 0.2884, + "num_input_tokens_seen": 1599104, + "step": 3250 + }, + { + "epoch": 0.4295895473142405, + "grad_norm": 0.04245501011610031, + "learning_rate": 1.717603589337556e-06, + "loss": 0.1717, + "num_input_tokens_seen": 1601344, + "step": 3255 + }, + { + "epoch": 0.43024943909198893, + "grad_norm": 22.026790618896484, + "learning_rate": 1.7202428081287938e-06, + "loss": 0.0818, + "num_input_tokens_seen": 1603968, + "step": 3260 + }, + { + "epoch": 0.4309093308697374, + "grad_norm": 61.95907211303711, + "learning_rate": 1.7228820269200314e-06, + "loss": 0.0261, + "num_input_tokens_seen": 1606400, + "step": 3265 + }, + { + "epoch": 0.4315692226474858, + "grad_norm": 258.6390380859375, + "learning_rate": 1.7255212457112694e-06, + "loss": 0.2814, + "num_input_tokens_seen": 1608960, + "step": 3270 + }, + { + "epoch": 0.43222911442523426, + "grad_norm": 0.6005437970161438, + "learning_rate": 1.7281604645025071e-06, + "loss": 0.0021, + "num_input_tokens_seen": 1611456, + "step": 3275 + }, + { + "epoch": 0.4328890062029827, + "grad_norm": 0.05120214447379112, + "learning_rate": 1.7307996832937451e-06, + "loss": 0.0595, + "num_input_tokens_seen": 1613952, + "step": 3280 + }, + { + "epoch": 0.43354889798073115, + "grad_norm": 30.487438201904297, + "learning_rate": 1.7334389020849827e-06, + "loss": 0.1862, + "num_input_tokens_seen": 1616320, + "step": 3285 + }, + { + "epoch": 0.4342087897584796, + "grad_norm": 0.9038780927658081, + "learning_rate": 1.7360781208762205e-06, + "loss": 0.3119, + "num_input_tokens_seen": 1619136, + "step": 3290 + }, + { + "epoch": 0.43486868153622804, + "grad_norm": 5.6396050453186035, + "learning_rate": 1.7387173396674584e-06, + "loss": 0.0999, + "num_input_tokens_seen": 1621632, + "step": 3295 + }, + { + "epoch": 0.4355285733139765, + "grad_norm": 1.9114316701889038, + "learning_rate": 1.7413565584586962e-06, + "loss": 0.1682, + "num_input_tokens_seen": 1624064, + "step": 3300 + }, + { + "epoch": 0.43618846509172493, + "grad_norm": 46.63256072998047, + "learning_rate": 1.7439957772499338e-06, + "loss": 0.1445, + "num_input_tokens_seen": 1626496, + "step": 3305 + }, + { + "epoch": 0.43684835686947343, + "grad_norm": 20.691329956054688, + "learning_rate": 1.7466349960411718e-06, + "loss": 0.1393, + "num_input_tokens_seen": 1628800, + "step": 3310 + }, + { + "epoch": 0.4375082486472219, + "grad_norm": 100.15694427490234, + "learning_rate": 1.7492742148324095e-06, + "loss": 0.1456, + "num_input_tokens_seen": 1631232, + "step": 3315 + }, + { + "epoch": 0.4381681404249703, + "grad_norm": 0.0552111379802227, + "learning_rate": 1.7519134336236473e-06, + "loss": 0.0401, + "num_input_tokens_seen": 1633728, + "step": 3320 + }, + { + "epoch": 0.43882803220271877, + "grad_norm": 0.3552817702293396, + "learning_rate": 1.754552652414885e-06, + "loss": 0.1055, + "num_input_tokens_seen": 1636224, + "step": 3325 + }, + { + "epoch": 0.4394879239804672, + "grad_norm": 7.17879581451416, + "learning_rate": 1.7571918712061229e-06, + "loss": 0.0655, + "num_input_tokens_seen": 1638272, + "step": 3330 + }, + { + "epoch": 0.44014781575821565, + "grad_norm": 0.05285099148750305, + "learning_rate": 1.7598310899973608e-06, + "loss": 0.0947, + "num_input_tokens_seen": 1640512, + "step": 3335 + }, + { + "epoch": 0.4408077075359641, + "grad_norm": 0.15522630512714386, + "learning_rate": 1.7624703087885986e-06, + "loss": 0.0818, + "num_input_tokens_seen": 1642944, + "step": 3340 + }, + { + "epoch": 0.44146759931371254, + "grad_norm": 220.8441925048828, + "learning_rate": 1.7651095275798362e-06, + "loss": 0.0882, + "num_input_tokens_seen": 1645376, + "step": 3345 + }, + { + "epoch": 0.442127491091461, + "grad_norm": 0.0852712094783783, + "learning_rate": 1.7677487463710742e-06, + "loss": 0.0007, + "num_input_tokens_seen": 1647936, + "step": 3350 + }, + { + "epoch": 0.44278738286920943, + "grad_norm": 0.47491714358329773, + "learning_rate": 1.770387965162312e-06, + "loss": 0.1307, + "num_input_tokens_seen": 1650496, + "step": 3355 + }, + { + "epoch": 0.4434472746469579, + "grad_norm": 0.023417294025421143, + "learning_rate": 1.7730271839535497e-06, + "loss": 0.1083, + "num_input_tokens_seen": 1652992, + "step": 3360 + }, + { + "epoch": 0.4441071664247063, + "grad_norm": 0.10983436554670334, + "learning_rate": 1.7756664027447875e-06, + "loss": 0.0738, + "num_input_tokens_seen": 1655488, + "step": 3365 + }, + { + "epoch": 0.4447670582024548, + "grad_norm": 0.035877879709005356, + "learning_rate": 1.7783056215360253e-06, + "loss": 0.1521, + "num_input_tokens_seen": 1657984, + "step": 3370 + }, + { + "epoch": 0.44542694998020327, + "grad_norm": 0.28190505504608154, + "learning_rate": 1.780944840327263e-06, + "loss": 0.2652, + "num_input_tokens_seen": 1660736, + "step": 3375 + }, + { + "epoch": 0.4460868417579517, + "grad_norm": 17.0957088470459, + "learning_rate": 1.7835840591185008e-06, + "loss": 0.1661, + "num_input_tokens_seen": 1663104, + "step": 3380 + }, + { + "epoch": 0.44674673353570016, + "grad_norm": 95.27015686035156, + "learning_rate": 1.7862232779097386e-06, + "loss": 0.0768, + "num_input_tokens_seen": 1665344, + "step": 3385 + }, + { + "epoch": 0.4474066253134486, + "grad_norm": 41.594940185546875, + "learning_rate": 1.7888624967009763e-06, + "loss": 0.1197, + "num_input_tokens_seen": 1668096, + "step": 3390 + }, + { + "epoch": 0.44806651709119705, + "grad_norm": 24.987401962280273, + "learning_rate": 1.7915017154922143e-06, + "loss": 0.0718, + "num_input_tokens_seen": 1670272, + "step": 3395 + }, + { + "epoch": 0.4487264088689455, + "grad_norm": 1.6394327878952026, + "learning_rate": 1.794140934283452e-06, + "loss": 0.1287, + "num_input_tokens_seen": 1672448, + "step": 3400 + }, + { + "epoch": 0.44938630064669394, + "grad_norm": 165.666259765625, + "learning_rate": 1.7967801530746897e-06, + "loss": 0.0269, + "num_input_tokens_seen": 1675200, + "step": 3405 + }, + { + "epoch": 0.4500461924244424, + "grad_norm": 9.98177433013916, + "learning_rate": 1.7994193718659277e-06, + "loss": 0.1333, + "num_input_tokens_seen": 1677696, + "step": 3410 + }, + { + "epoch": 0.4507060842021908, + "grad_norm": 18.331186294555664, + "learning_rate": 1.8020585906571654e-06, + "loss": 0.0596, + "num_input_tokens_seen": 1680256, + "step": 3415 + }, + { + "epoch": 0.45136597597993927, + "grad_norm": 0.17333543300628662, + "learning_rate": 1.8046978094484032e-06, + "loss": 0.085, + "num_input_tokens_seen": 1682624, + "step": 3420 + }, + { + "epoch": 0.4520258677576877, + "grad_norm": 49.40113830566406, + "learning_rate": 1.807337028239641e-06, + "loss": 0.0495, + "num_input_tokens_seen": 1685056, + "step": 3425 + }, + { + "epoch": 0.45268575953543616, + "grad_norm": 242.10452270507812, + "learning_rate": 1.8099762470308787e-06, + "loss": 0.1929, + "num_input_tokens_seen": 1687168, + "step": 3430 + }, + { + "epoch": 0.45334565131318466, + "grad_norm": 2.2440037727355957, + "learning_rate": 1.8126154658221167e-06, + "loss": 0.277, + "num_input_tokens_seen": 1689216, + "step": 3435 + }, + { + "epoch": 0.4540055430909331, + "grad_norm": 0.2605953812599182, + "learning_rate": 1.8152546846133543e-06, + "loss": 0.0305, + "num_input_tokens_seen": 1691776, + "step": 3440 + }, + { + "epoch": 0.45466543486868155, + "grad_norm": 0.7526996731758118, + "learning_rate": 1.817893903404592e-06, + "loss": 0.0776, + "num_input_tokens_seen": 1694336, + "step": 3445 + }, + { + "epoch": 0.45532532664643, + "grad_norm": 34.56523132324219, + "learning_rate": 1.82053312219583e-06, + "loss": 0.2252, + "num_input_tokens_seen": 1696640, + "step": 3450 + }, + { + "epoch": 0.45598521842417844, + "grad_norm": 16.095619201660156, + "learning_rate": 1.8231723409870678e-06, + "loss": 0.0788, + "num_input_tokens_seen": 1698880, + "step": 3455 + }, + { + "epoch": 0.4566451102019269, + "grad_norm": 74.69313049316406, + "learning_rate": 1.8258115597783054e-06, + "loss": 0.2611, + "num_input_tokens_seen": 1701312, + "step": 3460 + }, + { + "epoch": 0.45730500197967533, + "grad_norm": 50.04304885864258, + "learning_rate": 1.8284507785695434e-06, + "loss": 0.2096, + "num_input_tokens_seen": 1703808, + "step": 3465 + }, + { + "epoch": 0.4579648937574238, + "grad_norm": 82.02398681640625, + "learning_rate": 1.8310899973607811e-06, + "loss": 0.0747, + "num_input_tokens_seen": 1706304, + "step": 3470 + }, + { + "epoch": 0.4586247855351722, + "grad_norm": 15.55357837677002, + "learning_rate": 1.8337292161520187e-06, + "loss": 0.3095, + "num_input_tokens_seen": 1708736, + "step": 3475 + }, + { + "epoch": 0.45928467731292066, + "grad_norm": 0.8830556273460388, + "learning_rate": 1.8363684349432567e-06, + "loss": 0.1241, + "num_input_tokens_seen": 1711232, + "step": 3480 + }, + { + "epoch": 0.4599445690906691, + "grad_norm": 0.3285176157951355, + "learning_rate": 1.8390076537344945e-06, + "loss": 0.0022, + "num_input_tokens_seen": 1713600, + "step": 3485 + }, + { + "epoch": 0.46060446086841755, + "grad_norm": 0.16952642798423767, + "learning_rate": 1.8416468725257325e-06, + "loss": 0.1762, + "num_input_tokens_seen": 1716224, + "step": 3490 + }, + { + "epoch": 0.46126435264616605, + "grad_norm": 62.19145965576172, + "learning_rate": 1.84428609131697e-06, + "loss": 0.2366, + "num_input_tokens_seen": 1718720, + "step": 3495 + }, + { + "epoch": 0.4619242444239145, + "grad_norm": 19.505321502685547, + "learning_rate": 1.8469253101082078e-06, + "loss": 0.2068, + "num_input_tokens_seen": 1721280, + "step": 3500 + }, + { + "epoch": 0.46258413620166294, + "grad_norm": 57.49911880493164, + "learning_rate": 1.8495645288994458e-06, + "loss": 0.0785, + "num_input_tokens_seen": 1723712, + "step": 3505 + }, + { + "epoch": 0.4632440279794114, + "grad_norm": 36.87958526611328, + "learning_rate": 1.8522037476906835e-06, + "loss": 0.1624, + "num_input_tokens_seen": 1725952, + "step": 3510 + }, + { + "epoch": 0.46390391975715983, + "grad_norm": 6.948802947998047, + "learning_rate": 1.8548429664819211e-06, + "loss": 0.0022, + "num_input_tokens_seen": 1728512, + "step": 3515 + }, + { + "epoch": 0.4645638115349083, + "grad_norm": 116.72127532958984, + "learning_rate": 1.857482185273159e-06, + "loss": 0.1997, + "num_input_tokens_seen": 1731008, + "step": 3520 + }, + { + "epoch": 0.4652237033126567, + "grad_norm": 36.398902893066406, + "learning_rate": 1.8601214040643969e-06, + "loss": 0.1817, + "num_input_tokens_seen": 1733696, + "step": 3525 + }, + { + "epoch": 0.46588359509040517, + "grad_norm": 158.40782165527344, + "learning_rate": 1.8627606228556346e-06, + "loss": 0.0807, + "num_input_tokens_seen": 1736256, + "step": 3530 + }, + { + "epoch": 0.4665434868681536, + "grad_norm": 56.720367431640625, + "learning_rate": 1.8653998416468724e-06, + "loss": 0.2405, + "num_input_tokens_seen": 1738944, + "step": 3535 + }, + { + "epoch": 0.46720337864590206, + "grad_norm": 19.342018127441406, + "learning_rate": 1.8680390604381102e-06, + "loss": 0.207, + "num_input_tokens_seen": 1741568, + "step": 3540 + }, + { + "epoch": 0.4678632704236505, + "grad_norm": 0.12003885954618454, + "learning_rate": 1.8706782792293482e-06, + "loss": 0.0024, + "num_input_tokens_seen": 1743808, + "step": 3545 + }, + { + "epoch": 0.46852316220139895, + "grad_norm": 0.5443273782730103, + "learning_rate": 1.873317498020586e-06, + "loss": 0.035, + "num_input_tokens_seen": 1746176, + "step": 3550 + }, + { + "epoch": 0.46918305397914745, + "grad_norm": 0.12307876348495483, + "learning_rate": 1.8759567168118235e-06, + "loss": 0.214, + "num_input_tokens_seen": 1748608, + "step": 3555 + }, + { + "epoch": 0.4698429457568959, + "grad_norm": 0.24467946588993073, + "learning_rate": 1.8785959356030615e-06, + "loss": 0.0983, + "num_input_tokens_seen": 1751040, + "step": 3560 + }, + { + "epoch": 0.47050283753464434, + "grad_norm": 2.263025999069214, + "learning_rate": 1.8812351543942993e-06, + "loss": 0.1472, + "num_input_tokens_seen": 1753280, + "step": 3565 + }, + { + "epoch": 0.4711627293123928, + "grad_norm": 14.852109909057617, + "learning_rate": 1.883874373185537e-06, + "loss": 0.4087, + "num_input_tokens_seen": 1755712, + "step": 3570 + }, + { + "epoch": 0.4718226210901412, + "grad_norm": 53.581016540527344, + "learning_rate": 1.8865135919767748e-06, + "loss": 0.1291, + "num_input_tokens_seen": 1757952, + "step": 3575 + }, + { + "epoch": 0.47248251286788967, + "grad_norm": 116.46224212646484, + "learning_rate": 1.8891528107680126e-06, + "loss": 0.0719, + "num_input_tokens_seen": 1760384, + "step": 3580 + }, + { + "epoch": 0.4731424046456381, + "grad_norm": 0.08298421651124954, + "learning_rate": 1.8917920295592504e-06, + "loss": 0.0011, + "num_input_tokens_seen": 1762944, + "step": 3585 + }, + { + "epoch": 0.47380229642338656, + "grad_norm": 29.53057861328125, + "learning_rate": 1.8944312483504881e-06, + "loss": 0.2399, + "num_input_tokens_seen": 1765504, + "step": 3590 + }, + { + "epoch": 0.474462188201135, + "grad_norm": 0.1717340499162674, + "learning_rate": 1.897070467141726e-06, + "loss": 0.0452, + "num_input_tokens_seen": 1768128, + "step": 3595 + }, + { + "epoch": 0.47512207997888345, + "grad_norm": 0.200901597738266, + "learning_rate": 1.8997096859329637e-06, + "loss": 0.1898, + "num_input_tokens_seen": 1770624, + "step": 3600 + }, + { + "epoch": 0.4757819717566319, + "grad_norm": 24.74864387512207, + "learning_rate": 1.9023489047242017e-06, + "loss": 0.329, + "num_input_tokens_seen": 1773248, + "step": 3605 + }, + { + "epoch": 0.47644186353438034, + "grad_norm": 22.671142578125, + "learning_rate": 1.9049881235154392e-06, + "loss": 0.0657, + "num_input_tokens_seen": 1775488, + "step": 3610 + }, + { + "epoch": 0.4771017553121288, + "grad_norm": 1.239869475364685, + "learning_rate": 1.907627342306677e-06, + "loss": 0.1438, + "num_input_tokens_seen": 1777792, + "step": 3615 + }, + { + "epoch": 0.4777616470898773, + "grad_norm": 1.57161283493042, + "learning_rate": 1.9102665610979148e-06, + "loss": 0.0654, + "num_input_tokens_seen": 1780352, + "step": 3620 + }, + { + "epoch": 0.47842153886762573, + "grad_norm": 0.7097579836845398, + "learning_rate": 1.9129057798891528e-06, + "loss": 0.2003, + "num_input_tokens_seen": 1783168, + "step": 3625 + }, + { + "epoch": 0.4790814306453742, + "grad_norm": 23.16379737854004, + "learning_rate": 1.9155449986803903e-06, + "loss": 0.1582, + "num_input_tokens_seen": 1785920, + "step": 3630 + }, + { + "epoch": 0.4797413224231226, + "grad_norm": 0.09995155036449432, + "learning_rate": 1.9181842174716283e-06, + "loss": 0.1929, + "num_input_tokens_seen": 1788416, + "step": 3635 + }, + { + "epoch": 0.48040121420087106, + "grad_norm": 0.15477493405342102, + "learning_rate": 1.9208234362628663e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1790848, + "step": 3640 + }, + { + "epoch": 0.4810611059786195, + "grad_norm": 73.67361450195312, + "learning_rate": 1.923462655054104e-06, + "loss": 0.0349, + "num_input_tokens_seen": 1793280, + "step": 3645 + }, + { + "epoch": 0.48172099775636795, + "grad_norm": 0.04567524790763855, + "learning_rate": 1.9261018738453414e-06, + "loss": 0.209, + "num_input_tokens_seen": 1795648, + "step": 3650 + }, + { + "epoch": 0.4823808895341164, + "grad_norm": 30.12908363342285, + "learning_rate": 1.9287410926365794e-06, + "loss": 0.1709, + "num_input_tokens_seen": 1797952, + "step": 3655 + }, + { + "epoch": 0.48304078131186484, + "grad_norm": 53.03123092651367, + "learning_rate": 1.9313803114278174e-06, + "loss": 0.099, + "num_input_tokens_seen": 1800192, + "step": 3660 + }, + { + "epoch": 0.4837006730896133, + "grad_norm": 0.17931802570819855, + "learning_rate": 1.9340195302190554e-06, + "loss": 0.1852, + "num_input_tokens_seen": 1802560, + "step": 3665 + }, + { + "epoch": 0.48436056486736173, + "grad_norm": 77.04347229003906, + "learning_rate": 1.936658749010293e-06, + "loss": 0.2034, + "num_input_tokens_seen": 1805056, + "step": 3670 + }, + { + "epoch": 0.4850204566451102, + "grad_norm": 16.242393493652344, + "learning_rate": 1.9392979678015305e-06, + "loss": 0.2424, + "num_input_tokens_seen": 1807744, + "step": 3675 + }, + { + "epoch": 0.4856803484228587, + "grad_norm": 39.54520797729492, + "learning_rate": 1.9419371865927685e-06, + "loss": 0.1529, + "num_input_tokens_seen": 1810368, + "step": 3680 + }, + { + "epoch": 0.4863402402006071, + "grad_norm": 0.6363928914070129, + "learning_rate": 1.944576405384006e-06, + "loss": 0.0411, + "num_input_tokens_seen": 1813056, + "step": 3685 + }, + { + "epoch": 0.48700013197835557, + "grad_norm": 0.6807913184165955, + "learning_rate": 1.947215624175244e-06, + "loss": 0.2287, + "num_input_tokens_seen": 1815360, + "step": 3690 + }, + { + "epoch": 0.487660023756104, + "grad_norm": 16.44377326965332, + "learning_rate": 1.949854842966482e-06, + "loss": 0.1827, + "num_input_tokens_seen": 1817920, + "step": 3695 + }, + { + "epoch": 0.48831991553385246, + "grad_norm": 15.65255355834961, + "learning_rate": 1.9524940617577196e-06, + "loss": 0.0576, + "num_input_tokens_seen": 1820288, + "step": 3700 + }, + { + "epoch": 0.4889798073116009, + "grad_norm": 0.26828962564468384, + "learning_rate": 1.955133280548957e-06, + "loss": 0.1, + "num_input_tokens_seen": 1822656, + "step": 3705 + }, + { + "epoch": 0.48963969908934935, + "grad_norm": 18.101781845092773, + "learning_rate": 1.957772499340195e-06, + "loss": 0.1911, + "num_input_tokens_seen": 1824832, + "step": 3710 + }, + { + "epoch": 0.4902995908670978, + "grad_norm": 138.92214965820312, + "learning_rate": 1.960411718131433e-06, + "loss": 0.1059, + "num_input_tokens_seen": 1827200, + "step": 3715 + }, + { + "epoch": 0.49095948264484623, + "grad_norm": 17.669477462768555, + "learning_rate": 1.963050936922671e-06, + "loss": 0.1355, + "num_input_tokens_seen": 1829632, + "step": 3720 + }, + { + "epoch": 0.4916193744225947, + "grad_norm": 35.92660140991211, + "learning_rate": 1.9656901557139086e-06, + "loss": 0.0601, + "num_input_tokens_seen": 1832192, + "step": 3725 + }, + { + "epoch": 0.4922792662003431, + "grad_norm": 90.45658111572266, + "learning_rate": 1.968329374505146e-06, + "loss": 0.1779, + "num_input_tokens_seen": 1834752, + "step": 3730 + }, + { + "epoch": 0.49293915797809157, + "grad_norm": 7.398950099945068, + "learning_rate": 1.970968593296384e-06, + "loss": 0.0968, + "num_input_tokens_seen": 1837248, + "step": 3735 + }, + { + "epoch": 0.49359904975584007, + "grad_norm": 62.705711364746094, + "learning_rate": 1.973607812087622e-06, + "loss": 0.2923, + "num_input_tokens_seen": 1839680, + "step": 3740 + }, + { + "epoch": 0.4942589415335885, + "grad_norm": 0.30728596448898315, + "learning_rate": 1.9762470308788597e-06, + "loss": 0.115, + "num_input_tokens_seen": 1842304, + "step": 3745 + }, + { + "epoch": 0.49491883331133696, + "grad_norm": 1.9036290645599365, + "learning_rate": 1.9788862496700977e-06, + "loss": 0.0656, + "num_input_tokens_seen": 1844480, + "step": 3750 + }, + { + "epoch": 0.4955787250890854, + "grad_norm": 0.364557683467865, + "learning_rate": 1.9815254684613353e-06, + "loss": 0.0883, + "num_input_tokens_seen": 1846912, + "step": 3755 + }, + { + "epoch": 0.49623861686683385, + "grad_norm": 14.800479888916016, + "learning_rate": 1.9841646872525733e-06, + "loss": 0.1242, + "num_input_tokens_seen": 1849024, + "step": 3760 + }, + { + "epoch": 0.4968985086445823, + "grad_norm": 0.4379720389842987, + "learning_rate": 1.986803906043811e-06, + "loss": 0.0576, + "num_input_tokens_seen": 1851712, + "step": 3765 + }, + { + "epoch": 0.49755840042233074, + "grad_norm": 74.74554443359375, + "learning_rate": 1.989443124835049e-06, + "loss": 0.1076, + "num_input_tokens_seen": 1854208, + "step": 3770 + }, + { + "epoch": 0.4982182922000792, + "grad_norm": 0.05924411118030548, + "learning_rate": 1.9920823436262864e-06, + "loss": 0.0448, + "num_input_tokens_seen": 1856704, + "step": 3775 + }, + { + "epoch": 0.4988781839778276, + "grad_norm": 200.05575561523438, + "learning_rate": 1.9947215624175244e-06, + "loss": 0.1233, + "num_input_tokens_seen": 1858944, + "step": 3780 + }, + { + "epoch": 0.49953807575557607, + "grad_norm": 0.9478186964988708, + "learning_rate": 1.997360781208762e-06, + "loss": 0.1662, + "num_input_tokens_seen": 1861696, + "step": 3785 + }, + { + "epoch": 0.5001979675333246, + "grad_norm": 0.37777480483055115, + "learning_rate": 2e-06, + "loss": 0.3196, + "num_input_tokens_seen": 1864128, + "step": 3790 + }, + { + "epoch": 0.5001979675333246, + "eval_loss": 0.15765729546546936, + "eval_runtime": 7.8114, + "eval_samples_per_second": 862.204, + "eval_steps_per_second": 107.792, + "num_input_tokens_seen": 1864128, + "step": 3790 + }, + { + "epoch": 0.500857859311073, + "grad_norm": 0.1402450054883957, + "learning_rate": 1.9999998938786208e-06, + "loss": 0.0058, + "num_input_tokens_seen": 1866432, + "step": 3795 + }, + { + "epoch": 0.5015177510888215, + "grad_norm": 16.13665008544922, + "learning_rate": 1.9999995755145053e-06, + "loss": 0.0722, + "num_input_tokens_seen": 1868736, + "step": 3800 + }, + { + "epoch": 0.5021776428665699, + "grad_norm": 14.748276710510254, + "learning_rate": 1.9999990449077214e-06, + "loss": 0.1512, + "num_input_tokens_seen": 1871360, + "step": 3805 + }, + { + "epoch": 0.5028375346443184, + "grad_norm": 0.05216236785054207, + "learning_rate": 1.999998302058382e-06, + "loss": 0.0042, + "num_input_tokens_seen": 1873536, + "step": 3810 + }, + { + "epoch": 0.5034974264220667, + "grad_norm": 123.01942443847656, + "learning_rate": 1.999997346966644e-06, + "loss": 0.2111, + "num_input_tokens_seen": 1875904, + "step": 3815 + }, + { + "epoch": 0.5041573181998152, + "grad_norm": 34.44160461425781, + "learning_rate": 1.999996179632711e-06, + "loss": 0.2222, + "num_input_tokens_seen": 1878464, + "step": 3820 + }, + { + "epoch": 0.5048172099775636, + "grad_norm": 115.07276153564453, + "learning_rate": 1.9999948000568297e-06, + "loss": 0.1967, + "num_input_tokens_seen": 1880640, + "step": 3825 + }, + { + "epoch": 0.5054771017553121, + "grad_norm": 106.39981079101562, + "learning_rate": 1.9999932082392934e-06, + "loss": 0.1649, + "num_input_tokens_seen": 1882944, + "step": 3830 + }, + { + "epoch": 0.5061369935330606, + "grad_norm": 1.3459759950637817, + "learning_rate": 1.9999914041804405e-06, + "loss": 0.064, + "num_input_tokens_seen": 1885248, + "step": 3835 + }, + { + "epoch": 0.506796885310809, + "grad_norm": 21.34935188293457, + "learning_rate": 1.9999893878806534e-06, + "loss": 0.1077, + "num_input_tokens_seen": 1887872, + "step": 3840 + }, + { + "epoch": 0.5074567770885575, + "grad_norm": 185.3477020263672, + "learning_rate": 1.99998715934036e-06, + "loss": 0.0414, + "num_input_tokens_seen": 1890560, + "step": 3845 + }, + { + "epoch": 0.5081166688663059, + "grad_norm": 231.92300415039062, + "learning_rate": 1.999984718560033e-06, + "loss": 0.1999, + "num_input_tokens_seen": 1893056, + "step": 3850 + }, + { + "epoch": 0.5087765606440544, + "grad_norm": 0.08792197704315186, + "learning_rate": 1.9999820655401914e-06, + "loss": 0.1416, + "num_input_tokens_seen": 1895360, + "step": 3855 + }, + { + "epoch": 0.5094364524218028, + "grad_norm": 0.09239798784255981, + "learning_rate": 1.9999792002813973e-06, + "loss": 0.0954, + "num_input_tokens_seen": 1897664, + "step": 3860 + }, + { + "epoch": 0.5100963441995513, + "grad_norm": 50.75433349609375, + "learning_rate": 1.9999761227842592e-06, + "loss": 0.27, + "num_input_tokens_seen": 1900288, + "step": 3865 + }, + { + "epoch": 0.5107562359772997, + "grad_norm": 0.144536554813385, + "learning_rate": 1.9999728330494307e-06, + "loss": 0.0797, + "num_input_tokens_seen": 1903040, + "step": 3870 + }, + { + "epoch": 0.5114161277550482, + "grad_norm": 18.808856964111328, + "learning_rate": 1.9999693310776095e-06, + "loss": 0.1566, + "num_input_tokens_seen": 1905472, + "step": 3875 + }, + { + "epoch": 0.5120760195327966, + "grad_norm": 0.6288896203041077, + "learning_rate": 1.9999656168695387e-06, + "loss": 0.0922, + "num_input_tokens_seen": 1907712, + "step": 3880 + }, + { + "epoch": 0.5127359113105451, + "grad_norm": 0.032050516456365585, + "learning_rate": 1.9999616904260072e-06, + "loss": 0.001, + "num_input_tokens_seen": 1910080, + "step": 3885 + }, + { + "epoch": 0.5133958030882935, + "grad_norm": 38.96467971801758, + "learning_rate": 1.9999575517478477e-06, + "loss": 0.2619, + "num_input_tokens_seen": 1912512, + "step": 3890 + }, + { + "epoch": 0.514055694866042, + "grad_norm": 0.0541483499109745, + "learning_rate": 1.9999532008359393e-06, + "loss": 0.1074, + "num_input_tokens_seen": 1914752, + "step": 3895 + }, + { + "epoch": 0.5147155866437905, + "grad_norm": 0.31871920824050903, + "learning_rate": 1.999948637691205e-06, + "loss": 0.002, + "num_input_tokens_seen": 1917120, + "step": 3900 + }, + { + "epoch": 0.5153754784215389, + "grad_norm": 118.97882080078125, + "learning_rate": 1.9999438623146132e-06, + "loss": 0.0127, + "num_input_tokens_seen": 1919232, + "step": 3905 + }, + { + "epoch": 0.5160353701992874, + "grad_norm": 0.19287319481372833, + "learning_rate": 1.999938874707178e-06, + "loss": 0.0941, + "num_input_tokens_seen": 1921856, + "step": 3910 + }, + { + "epoch": 0.5166952619770357, + "grad_norm": 40.96875762939453, + "learning_rate": 1.9999336748699576e-06, + "loss": 0.1961, + "num_input_tokens_seen": 1924224, + "step": 3915 + }, + { + "epoch": 0.5173551537547842, + "grad_norm": 0.026069777086377144, + "learning_rate": 1.9999282628040553e-06, + "loss": 0.0313, + "num_input_tokens_seen": 1926464, + "step": 3920 + }, + { + "epoch": 0.5180150455325326, + "grad_norm": 0.15689440071582794, + "learning_rate": 1.9999226385106205e-06, + "loss": 0.3644, + "num_input_tokens_seen": 1928896, + "step": 3925 + }, + { + "epoch": 0.5186749373102811, + "grad_norm": 70.96661376953125, + "learning_rate": 1.9999168019908464e-06, + "loss": 0.1085, + "num_input_tokens_seen": 1931200, + "step": 3930 + }, + { + "epoch": 0.5193348290880295, + "grad_norm": 0.10924938321113586, + "learning_rate": 1.9999107532459716e-06, + "loss": 0.1389, + "num_input_tokens_seen": 1933632, + "step": 3935 + }, + { + "epoch": 0.519994720865778, + "grad_norm": 0.22806203365325928, + "learning_rate": 1.9999044922772808e-06, + "loss": 0.0024, + "num_input_tokens_seen": 1935872, + "step": 3940 + }, + { + "epoch": 0.5206546126435264, + "grad_norm": 0.1909378170967102, + "learning_rate": 1.999898019086102e-06, + "loss": 0.0619, + "num_input_tokens_seen": 1938048, + "step": 3945 + }, + { + "epoch": 0.5213145044212749, + "grad_norm": 97.45793151855469, + "learning_rate": 1.999891333673809e-06, + "loss": 0.0606, + "num_input_tokens_seen": 1940608, + "step": 3950 + }, + { + "epoch": 0.5219743961990233, + "grad_norm": 131.9565887451172, + "learning_rate": 1.999884436041822e-06, + "loss": 0.3859, + "num_input_tokens_seen": 1943040, + "step": 3955 + }, + { + "epoch": 0.5226342879767718, + "grad_norm": 0.3030645251274109, + "learning_rate": 1.999877326191603e-06, + "loss": 0.1834, + "num_input_tokens_seen": 1945664, + "step": 3960 + }, + { + "epoch": 0.5232941797545203, + "grad_norm": 0.06720297783613205, + "learning_rate": 1.9998700041246626e-06, + "loss": 0.0053, + "num_input_tokens_seen": 1948032, + "step": 3965 + }, + { + "epoch": 0.5239540715322687, + "grad_norm": 0.10676462203264236, + "learning_rate": 1.9998624698425545e-06, + "loss": 0.0008, + "num_input_tokens_seen": 1950208, + "step": 3970 + }, + { + "epoch": 0.5246139633100172, + "grad_norm": 0.34879347681999207, + "learning_rate": 1.999854723346877e-06, + "loss": 0.1693, + "num_input_tokens_seen": 1952896, + "step": 3975 + }, + { + "epoch": 0.5252738550877656, + "grad_norm": 0.12043995410203934, + "learning_rate": 1.999846764639275e-06, + "loss": 0.0881, + "num_input_tokens_seen": 1955200, + "step": 3980 + }, + { + "epoch": 0.5259337468655141, + "grad_norm": 0.03420906886458397, + "learning_rate": 1.999838593721438e-06, + "loss": 0.0753, + "num_input_tokens_seen": 1957376, + "step": 3985 + }, + { + "epoch": 0.5265936386432625, + "grad_norm": 0.09042178839445114, + "learning_rate": 1.999830210595099e-06, + "loss": 0.2321, + "num_input_tokens_seen": 1960064, + "step": 3990 + }, + { + "epoch": 0.527253530421011, + "grad_norm": 0.13190309703350067, + "learning_rate": 1.999821615262039e-06, + "loss": 0.1365, + "num_input_tokens_seen": 1962624, + "step": 3995 + }, + { + "epoch": 0.5279134221987594, + "grad_norm": 0.33515259623527527, + "learning_rate": 1.9998128077240805e-06, + "loss": 0.0799, + "num_input_tokens_seen": 1965056, + "step": 4000 + }, + { + "epoch": 0.5285733139765079, + "grad_norm": 1.0748093128204346, + "learning_rate": 1.9998037879830937e-06, + "loss": 0.137, + "num_input_tokens_seen": 1967424, + "step": 4005 + }, + { + "epoch": 0.5292332057542563, + "grad_norm": 89.96631622314453, + "learning_rate": 1.999794556040993e-06, + "loss": 0.2433, + "num_input_tokens_seen": 1969856, + "step": 4010 + }, + { + "epoch": 0.5298930975320048, + "grad_norm": 0.29487359523773193, + "learning_rate": 1.999785111899738e-06, + "loss": 0.0609, + "num_input_tokens_seen": 1972544, + "step": 4015 + }, + { + "epoch": 0.5305529893097533, + "grad_norm": 0.09903884679079056, + "learning_rate": 1.9997754555613324e-06, + "loss": 0.0675, + "num_input_tokens_seen": 1974912, + "step": 4020 + }, + { + "epoch": 0.5312128810875016, + "grad_norm": 21.907075881958008, + "learning_rate": 1.999765587027827e-06, + "loss": 0.2871, + "num_input_tokens_seen": 1977216, + "step": 4025 + }, + { + "epoch": 0.5318727728652501, + "grad_norm": 30.0252742767334, + "learning_rate": 1.9997555063013147e-06, + "loss": 0.2346, + "num_input_tokens_seen": 1979648, + "step": 4030 + }, + { + "epoch": 0.5325326646429985, + "grad_norm": 84.9073257446289, + "learning_rate": 1.999745213383936e-06, + "loss": 0.013, + "num_input_tokens_seen": 1981888, + "step": 4035 + }, + { + "epoch": 0.533192556420747, + "grad_norm": 86.11760711669922, + "learning_rate": 1.9997347082778753e-06, + "loss": 0.1859, + "num_input_tokens_seen": 1984448, + "step": 4040 + }, + { + "epoch": 0.5338524481984954, + "grad_norm": 45.54768753051758, + "learning_rate": 1.999723990985363e-06, + "loss": 0.3186, + "num_input_tokens_seen": 1987072, + "step": 4045 + }, + { + "epoch": 0.5345123399762439, + "grad_norm": 0.8239102363586426, + "learning_rate": 1.999713061508672e-06, + "loss": 0.1181, + "num_input_tokens_seen": 1989632, + "step": 4050 + }, + { + "epoch": 0.5351722317539923, + "grad_norm": 0.6783844232559204, + "learning_rate": 1.9997019198501233e-06, + "loss": 0.0538, + "num_input_tokens_seen": 1992192, + "step": 4055 + }, + { + "epoch": 0.5358321235317408, + "grad_norm": 33.974369049072266, + "learning_rate": 1.999690566012082e-06, + "loss": 0.265, + "num_input_tokens_seen": 1994624, + "step": 4060 + }, + { + "epoch": 0.5364920153094892, + "grad_norm": 17.405261993408203, + "learning_rate": 1.9996789999969568e-06, + "loss": 0.0607, + "num_input_tokens_seen": 1997056, + "step": 4065 + }, + { + "epoch": 0.5371519070872377, + "grad_norm": 2.6141507625579834, + "learning_rate": 1.999667221807203e-06, + "loss": 0.134, + "num_input_tokens_seen": 1999360, + "step": 4070 + }, + { + "epoch": 0.5378117988649861, + "grad_norm": 58.6915397644043, + "learning_rate": 1.9996552314453204e-06, + "loss": 0.082, + "num_input_tokens_seen": 2001856, + "step": 4075 + }, + { + "epoch": 0.5384716906427346, + "grad_norm": 51.96027755737305, + "learning_rate": 1.999643028913854e-06, + "loss": 0.1956, + "num_input_tokens_seen": 2004288, + "step": 4080 + }, + { + "epoch": 0.5391315824204831, + "grad_norm": 80.61639404296875, + "learning_rate": 1.9996306142153935e-06, + "loss": 0.1135, + "num_input_tokens_seen": 2006976, + "step": 4085 + }, + { + "epoch": 0.5397914741982315, + "grad_norm": 4.0380539894104, + "learning_rate": 1.9996179873525737e-06, + "loss": 0.0367, + "num_input_tokens_seen": 2009280, + "step": 4090 + }, + { + "epoch": 0.54045136597598, + "grad_norm": 189.69859313964844, + "learning_rate": 1.9996051483280744e-06, + "loss": 0.2253, + "num_input_tokens_seen": 2011776, + "step": 4095 + }, + { + "epoch": 0.5411112577537284, + "grad_norm": 47.396934509277344, + "learning_rate": 1.9995920971446215e-06, + "loss": 0.4021, + "num_input_tokens_seen": 2014336, + "step": 4100 + }, + { + "epoch": 0.5417711495314769, + "grad_norm": 31.230287551879883, + "learning_rate": 1.9995788338049846e-06, + "loss": 0.1495, + "num_input_tokens_seen": 2017152, + "step": 4105 + }, + { + "epoch": 0.5424310413092253, + "grad_norm": 26.340003967285156, + "learning_rate": 1.999565358311978e-06, + "loss": 0.1161, + "num_input_tokens_seen": 2019520, + "step": 4110 + }, + { + "epoch": 0.5430909330869738, + "grad_norm": 0.42596912384033203, + "learning_rate": 1.999551670668463e-06, + "loss": 0.1655, + "num_input_tokens_seen": 2021632, + "step": 4115 + }, + { + "epoch": 0.5437508248647221, + "grad_norm": 41.248680114746094, + "learning_rate": 1.9995377708773437e-06, + "loss": 0.1365, + "num_input_tokens_seen": 2023744, + "step": 4120 + }, + { + "epoch": 0.5444107166424706, + "grad_norm": 0.6861774921417236, + "learning_rate": 1.999523658941571e-06, + "loss": 0.0072, + "num_input_tokens_seen": 2026048, + "step": 4125 + }, + { + "epoch": 0.545070608420219, + "grad_norm": 0.05741658806800842, + "learning_rate": 1.999509334864139e-06, + "loss": 0.0905, + "num_input_tokens_seen": 2028352, + "step": 4130 + }, + { + "epoch": 0.5457305001979675, + "grad_norm": 0.12205217778682709, + "learning_rate": 1.999494798648089e-06, + "loss": 0.0448, + "num_input_tokens_seen": 2030848, + "step": 4135 + }, + { + "epoch": 0.5463903919757159, + "grad_norm": 0.046685557812452316, + "learning_rate": 1.9994800502965055e-06, + "loss": 0.1427, + "num_input_tokens_seen": 2033344, + "step": 4140 + }, + { + "epoch": 0.5470502837534644, + "grad_norm": 1.0870046615600586, + "learning_rate": 1.9994650898125193e-06, + "loss": 0.0661, + "num_input_tokens_seen": 2035584, + "step": 4145 + }, + { + "epoch": 0.5477101755312129, + "grad_norm": 23.627378463745117, + "learning_rate": 1.9994499171993056e-06, + "loss": 0.0643, + "num_input_tokens_seen": 2038272, + "step": 4150 + }, + { + "epoch": 0.5483700673089613, + "grad_norm": 19.256277084350586, + "learning_rate": 1.999434532460084e-06, + "loss": 0.1879, + "num_input_tokens_seen": 2040768, + "step": 4155 + }, + { + "epoch": 0.5490299590867098, + "grad_norm": 27.206546783447266, + "learning_rate": 1.99941893559812e-06, + "loss": 0.1577, + "num_input_tokens_seen": 2043072, + "step": 4160 + }, + { + "epoch": 0.5496898508644582, + "grad_norm": 14.51369571685791, + "learning_rate": 1.9994031266167247e-06, + "loss": 0.1324, + "num_input_tokens_seen": 2045824, + "step": 4165 + }, + { + "epoch": 0.5503497426422067, + "grad_norm": 26.757734298706055, + "learning_rate": 1.999387105519253e-06, + "loss": 0.366, + "num_input_tokens_seen": 2048064, + "step": 4170 + }, + { + "epoch": 0.5510096344199551, + "grad_norm": 0.8686099648475647, + "learning_rate": 1.9993708723091044e-06, + "loss": 0.0539, + "num_input_tokens_seen": 2050432, + "step": 4175 + }, + { + "epoch": 0.5516695261977036, + "grad_norm": 1.9059375524520874, + "learning_rate": 1.9993544269897253e-06, + "loss": 0.0967, + "num_input_tokens_seen": 2052928, + "step": 4180 + }, + { + "epoch": 0.552329417975452, + "grad_norm": 1.612260341644287, + "learning_rate": 1.999337769564606e-06, + "loss": 0.1474, + "num_input_tokens_seen": 2055424, + "step": 4185 + }, + { + "epoch": 0.5529893097532005, + "grad_norm": 1.3080824613571167, + "learning_rate": 1.9993209000372814e-06, + "loss": 0.121, + "num_input_tokens_seen": 2057536, + "step": 4190 + }, + { + "epoch": 0.5536492015309489, + "grad_norm": 45.95222854614258, + "learning_rate": 1.9993038184113325e-06, + "loss": 0.2545, + "num_input_tokens_seen": 2059840, + "step": 4195 + }, + { + "epoch": 0.5543090933086974, + "grad_norm": 35.62221908569336, + "learning_rate": 1.999286524690385e-06, + "loss": 0.2414, + "num_input_tokens_seen": 2062656, + "step": 4200 + }, + { + "epoch": 0.5549689850864459, + "grad_norm": 1.8063488006591797, + "learning_rate": 1.999269018878108e-06, + "loss": 0.2045, + "num_input_tokens_seen": 2064960, + "step": 4205 + }, + { + "epoch": 0.5556288768641943, + "grad_norm": 18.830642700195312, + "learning_rate": 1.999251300978219e-06, + "loss": 0.233, + "num_input_tokens_seen": 2067008, + "step": 4210 + }, + { + "epoch": 0.5562887686419428, + "grad_norm": 0.42927706241607666, + "learning_rate": 1.9992333709944764e-06, + "loss": 0.0333, + "num_input_tokens_seen": 2069696, + "step": 4215 + }, + { + "epoch": 0.5569486604196912, + "grad_norm": 10.923612594604492, + "learning_rate": 1.9992152289306872e-06, + "loss": 0.0229, + "num_input_tokens_seen": 2072320, + "step": 4220 + }, + { + "epoch": 0.5576085521974397, + "grad_norm": 0.6108276844024658, + "learning_rate": 1.999196874790701e-06, + "loss": 0.005, + "num_input_tokens_seen": 2074752, + "step": 4225 + }, + { + "epoch": 0.558268443975188, + "grad_norm": 13.711141586303711, + "learning_rate": 1.999178308578414e-06, + "loss": 0.1422, + "num_input_tokens_seen": 2077440, + "step": 4230 + }, + { + "epoch": 0.5589283357529365, + "grad_norm": 70.51332092285156, + "learning_rate": 1.9991595302977666e-06, + "loss": 0.2342, + "num_input_tokens_seen": 2080000, + "step": 4235 + }, + { + "epoch": 0.5595882275306849, + "grad_norm": 30.383102416992188, + "learning_rate": 1.9991405399527438e-06, + "loss": 0.196, + "num_input_tokens_seen": 2082560, + "step": 4240 + }, + { + "epoch": 0.5602481193084334, + "grad_norm": 18.101728439331055, + "learning_rate": 1.999121337547377e-06, + "loss": 0.1375, + "num_input_tokens_seen": 2084864, + "step": 4245 + }, + { + "epoch": 0.5609080110861818, + "grad_norm": 0.16135449707508087, + "learning_rate": 1.9991019230857413e-06, + "loss": 0.097, + "num_input_tokens_seen": 2087424, + "step": 4250 + }, + { + "epoch": 0.5615679028639303, + "grad_norm": 6.275109767913818, + "learning_rate": 1.999082296571957e-06, + "loss": 0.1572, + "num_input_tokens_seen": 2090048, + "step": 4255 + }, + { + "epoch": 0.5622277946416787, + "grad_norm": 0.1460818648338318, + "learning_rate": 1.9990624580101907e-06, + "loss": 0.0845, + "num_input_tokens_seen": 2092416, + "step": 4260 + }, + { + "epoch": 0.5628876864194272, + "grad_norm": 2.7357776165008545, + "learning_rate": 1.999042407404652e-06, + "loss": 0.0568, + "num_input_tokens_seen": 2094656, + "step": 4265 + }, + { + "epoch": 0.5635475781971757, + "grad_norm": 322.82025146484375, + "learning_rate": 1.999022144759597e-06, + "loss": 0.0462, + "num_input_tokens_seen": 2097024, + "step": 4270 + }, + { + "epoch": 0.5642074699749241, + "grad_norm": 3.075206995010376, + "learning_rate": 1.9990016700793257e-06, + "loss": 0.0643, + "num_input_tokens_seen": 2099392, + "step": 4275 + }, + { + "epoch": 0.5648673617526726, + "grad_norm": 1.3678243160247803, + "learning_rate": 1.9989809833681845e-06, + "loss": 0.0911, + "num_input_tokens_seen": 2102016, + "step": 4280 + }, + { + "epoch": 0.565527253530421, + "grad_norm": 34.296119689941406, + "learning_rate": 1.9989600846305634e-06, + "loss": 0.2019, + "num_input_tokens_seen": 2104320, + "step": 4285 + }, + { + "epoch": 0.5661871453081695, + "grad_norm": 43.207645416259766, + "learning_rate": 1.9989389738708984e-06, + "loss": 0.1077, + "num_input_tokens_seen": 2107136, + "step": 4290 + }, + { + "epoch": 0.5668470370859179, + "grad_norm": 1.0292057991027832, + "learning_rate": 1.9989176510936698e-06, + "loss": 0.0653, + "num_input_tokens_seen": 2109888, + "step": 4295 + }, + { + "epoch": 0.5675069288636664, + "grad_norm": 17.092741012573242, + "learning_rate": 1.9988961163034033e-06, + "loss": 0.1031, + "num_input_tokens_seen": 2112192, + "step": 4300 + }, + { + "epoch": 0.5681668206414148, + "grad_norm": 0.08173481374979019, + "learning_rate": 1.9988743695046696e-06, + "loss": 0.1154, + "num_input_tokens_seen": 2114752, + "step": 4305 + }, + { + "epoch": 0.5688267124191633, + "grad_norm": 0.012852237559854984, + "learning_rate": 1.9988524107020844e-06, + "loss": 0.0766, + "num_input_tokens_seen": 2117184, + "step": 4310 + }, + { + "epoch": 0.5694866041969117, + "grad_norm": 0.25723448395729065, + "learning_rate": 1.9988302399003083e-06, + "loss": 0.1522, + "num_input_tokens_seen": 2119552, + "step": 4315 + }, + { + "epoch": 0.5701464959746602, + "grad_norm": 138.39862060546875, + "learning_rate": 1.9988078571040464e-06, + "loss": 0.1321, + "num_input_tokens_seen": 2121920, + "step": 4320 + }, + { + "epoch": 0.5708063877524086, + "grad_norm": 12.506170272827148, + "learning_rate": 1.99878526231805e-06, + "loss": 0.2248, + "num_input_tokens_seen": 2124096, + "step": 4325 + }, + { + "epoch": 0.571466279530157, + "grad_norm": 27.999643325805664, + "learning_rate": 1.998762455547114e-06, + "loss": 0.1275, + "num_input_tokens_seen": 2126528, + "step": 4330 + }, + { + "epoch": 0.5721261713079056, + "grad_norm": 13.254135131835938, + "learning_rate": 1.998739436796079e-06, + "loss": 0.0773, + "num_input_tokens_seen": 2128576, + "step": 4335 + }, + { + "epoch": 0.5727860630856539, + "grad_norm": 190.8338165283203, + "learning_rate": 1.9987162060698312e-06, + "loss": 0.1947, + "num_input_tokens_seen": 2130688, + "step": 4340 + }, + { + "epoch": 0.5734459548634024, + "grad_norm": 15.912854194641113, + "learning_rate": 1.9986927633733007e-06, + "loss": 0.2038, + "num_input_tokens_seen": 2133312, + "step": 4345 + }, + { + "epoch": 0.5741058466411508, + "grad_norm": 0.5080829858779907, + "learning_rate": 1.9986691087114634e-06, + "loss": 0.1005, + "num_input_tokens_seen": 2135680, + "step": 4350 + }, + { + "epoch": 0.5747657384188993, + "grad_norm": 0.27579280734062195, + "learning_rate": 1.9986452420893393e-06, + "loss": 0.1931, + "num_input_tokens_seen": 2138112, + "step": 4355 + }, + { + "epoch": 0.5754256301966477, + "grad_norm": 25.686216354370117, + "learning_rate": 1.998621163511994e-06, + "loss": 0.3394, + "num_input_tokens_seen": 2140352, + "step": 4360 + }, + { + "epoch": 0.5760855219743962, + "grad_norm": 0.8148085474967957, + "learning_rate": 1.998596872984539e-06, + "loss": 0.1082, + "num_input_tokens_seen": 2143040, + "step": 4365 + }, + { + "epoch": 0.5767454137521446, + "grad_norm": 0.7620784044265747, + "learning_rate": 1.998572370512128e-06, + "loss": 0.0335, + "num_input_tokens_seen": 2145280, + "step": 4370 + }, + { + "epoch": 0.5774053055298931, + "grad_norm": 1.3707529306411743, + "learning_rate": 1.998547656099963e-06, + "loss": 0.0719, + "num_input_tokens_seen": 2147904, + "step": 4375 + }, + { + "epoch": 0.5780651973076415, + "grad_norm": 0.11669503152370453, + "learning_rate": 1.9985227297532886e-06, + "loss": 0.0904, + "num_input_tokens_seen": 2150400, + "step": 4380 + }, + { + "epoch": 0.57872508908539, + "grad_norm": 1.8291724920272827, + "learning_rate": 1.9984975914773957e-06, + "loss": 0.1622, + "num_input_tokens_seen": 2153088, + "step": 4385 + }, + { + "epoch": 0.5793849808631385, + "grad_norm": 0.11948619037866592, + "learning_rate": 1.9984722412776197e-06, + "loss": 0.0055, + "num_input_tokens_seen": 2155776, + "step": 4390 + }, + { + "epoch": 0.5800448726408869, + "grad_norm": 58.45433044433594, + "learning_rate": 1.9984466791593407e-06, + "loss": 0.2532, + "num_input_tokens_seen": 2158400, + "step": 4395 + }, + { + "epoch": 0.5807047644186354, + "grad_norm": 0.1013028621673584, + "learning_rate": 1.9984209051279843e-06, + "loss": 0.0378, + "num_input_tokens_seen": 2160704, + "step": 4400 + }, + { + "epoch": 0.5813646561963838, + "grad_norm": 53.731590270996094, + "learning_rate": 1.998394919189021e-06, + "loss": 0.0141, + "num_input_tokens_seen": 2163200, + "step": 4405 + }, + { + "epoch": 0.5820245479741323, + "grad_norm": 87.15843200683594, + "learning_rate": 1.9983687213479655e-06, + "loss": 0.137, + "num_input_tokens_seen": 2165376, + "step": 4410 + }, + { + "epoch": 0.5826844397518807, + "grad_norm": 33.592220306396484, + "learning_rate": 1.998342311610379e-06, + "loss": 0.2062, + "num_input_tokens_seen": 2167808, + "step": 4415 + }, + { + "epoch": 0.5833443315296292, + "grad_norm": 50.75172424316406, + "learning_rate": 1.998315689981866e-06, + "loss": 0.225, + "num_input_tokens_seen": 2170112, + "step": 4420 + }, + { + "epoch": 0.5840042233073776, + "grad_norm": 0.11312510073184967, + "learning_rate": 1.998288856468077e-06, + "loss": 0.0063, + "num_input_tokens_seen": 2172480, + "step": 4425 + }, + { + "epoch": 0.5846641150851261, + "grad_norm": 41.83127975463867, + "learning_rate": 1.998261811074707e-06, + "loss": 0.1578, + "num_input_tokens_seen": 2175104, + "step": 4430 + }, + { + "epoch": 0.5853240068628744, + "grad_norm": 16.17295265197754, + "learning_rate": 1.998234553807497e-06, + "loss": 0.0695, + "num_input_tokens_seen": 2177280, + "step": 4435 + }, + { + "epoch": 0.585983898640623, + "grad_norm": 0.10577072203159332, + "learning_rate": 1.9982070846722312e-06, + "loss": 0.0882, + "num_input_tokens_seen": 2179776, + "step": 4440 + }, + { + "epoch": 0.5866437904183713, + "grad_norm": 1.0365639925003052, + "learning_rate": 1.9981794036747402e-06, + "loss": 0.1574, + "num_input_tokens_seen": 2182400, + "step": 4445 + }, + { + "epoch": 0.5873036821961198, + "grad_norm": 0.619874894618988, + "learning_rate": 1.998151510820899e-06, + "loss": 0.0313, + "num_input_tokens_seen": 2185088, + "step": 4450 + }, + { + "epoch": 0.5879635739738683, + "grad_norm": 1.185616135597229, + "learning_rate": 1.9981234061166275e-06, + "loss": 0.0876, + "num_input_tokens_seen": 2187776, + "step": 4455 + }, + { + "epoch": 0.5886234657516167, + "grad_norm": 225.3323211669922, + "learning_rate": 1.9980950895678914e-06, + "loss": 0.0183, + "num_input_tokens_seen": 2190016, + "step": 4460 + }, + { + "epoch": 0.5892833575293652, + "grad_norm": 91.41350555419922, + "learning_rate": 1.9980665611806998e-06, + "loss": 0.0676, + "num_input_tokens_seen": 2192320, + "step": 4465 + }, + { + "epoch": 0.5899432493071136, + "grad_norm": 31.638723373413086, + "learning_rate": 1.998037820961108e-06, + "loss": 0.1909, + "num_input_tokens_seen": 2194752, + "step": 4470 + }, + { + "epoch": 0.5906031410848621, + "grad_norm": 23.39573097229004, + "learning_rate": 1.9980088689152163e-06, + "loss": 0.1777, + "num_input_tokens_seen": 2197056, + "step": 4475 + }, + { + "epoch": 0.5912630328626105, + "grad_norm": 0.038047995418310165, + "learning_rate": 1.9979797050491687e-06, + "loss": 0.092, + "num_input_tokens_seen": 2199296, + "step": 4480 + }, + { + "epoch": 0.591922924640359, + "grad_norm": 47.337886810302734, + "learning_rate": 1.997950329369156e-06, + "loss": 0.3187, + "num_input_tokens_seen": 2201664, + "step": 4485 + }, + { + "epoch": 0.5925828164181074, + "grad_norm": 13.546995162963867, + "learning_rate": 1.997920741881412e-06, + "loss": 0.1067, + "num_input_tokens_seen": 2204288, + "step": 4490 + }, + { + "epoch": 0.5932427081958559, + "grad_norm": 15.969120979309082, + "learning_rate": 1.997890942592217e-06, + "loss": 0.1064, + "num_input_tokens_seen": 2206528, + "step": 4495 + }, + { + "epoch": 0.5939025999736043, + "grad_norm": 0.7014201283454895, + "learning_rate": 1.997860931507896e-06, + "loss": 0.1168, + "num_input_tokens_seen": 2209024, + "step": 4500 + }, + { + "epoch": 0.5945624917513528, + "grad_norm": 0.4199674129486084, + "learning_rate": 1.997830708634818e-06, + "loss": 0.0681, + "num_input_tokens_seen": 2211584, + "step": 4505 + }, + { + "epoch": 0.5952223835291012, + "grad_norm": 19.820323944091797, + "learning_rate": 1.9978002739793977e-06, + "loss": 0.1479, + "num_input_tokens_seen": 2213952, + "step": 4510 + }, + { + "epoch": 0.5958822753068497, + "grad_norm": 15.060903549194336, + "learning_rate": 1.9977696275480945e-06, + "loss": 0.1002, + "num_input_tokens_seen": 2216192, + "step": 4515 + }, + { + "epoch": 0.5965421670845982, + "grad_norm": 5.9868268966674805, + "learning_rate": 1.9977387693474134e-06, + "loss": 0.0057, + "num_input_tokens_seen": 2218688, + "step": 4520 + }, + { + "epoch": 0.5972020588623466, + "grad_norm": 0.29611310362815857, + "learning_rate": 1.9977076993839037e-06, + "loss": 0.0011, + "num_input_tokens_seen": 2220928, + "step": 4525 + }, + { + "epoch": 0.5978619506400951, + "grad_norm": 0.06432250887155533, + "learning_rate": 1.9976764176641592e-06, + "loss": 0.001, + "num_input_tokens_seen": 2223360, + "step": 4530 + }, + { + "epoch": 0.5985218424178435, + "grad_norm": 0.038721442222595215, + "learning_rate": 1.99764492419482e-06, + "loss": 0.1205, + "num_input_tokens_seen": 2225792, + "step": 4535 + }, + { + "epoch": 0.599181734195592, + "grad_norm": 65.97834014892578, + "learning_rate": 1.99761321898257e-06, + "loss": 0.2427, + "num_input_tokens_seen": 2228288, + "step": 4540 + }, + { + "epoch": 0.5998416259733403, + "grad_norm": 33.381248474121094, + "learning_rate": 1.9975813020341387e-06, + "loss": 0.2698, + "num_input_tokens_seen": 2230848, + "step": 4545 + }, + { + "epoch": 0.6005015177510888, + "grad_norm": 254.90545654296875, + "learning_rate": 1.9975491733562997e-06, + "loss": 0.2384, + "num_input_tokens_seen": 2233472, + "step": 4550 + }, + { + "epoch": 0.6011614095288372, + "grad_norm": 0.6209867000579834, + "learning_rate": 1.9975168329558725e-06, + "loss": 0.2191, + "num_input_tokens_seen": 2236096, + "step": 4555 + }, + { + "epoch": 0.6018213013065857, + "grad_norm": 0.45464786887168884, + "learning_rate": 1.9974842808397206e-06, + "loss": 0.1075, + "num_input_tokens_seen": 2238720, + "step": 4560 + }, + { + "epoch": 0.6024811930843341, + "grad_norm": 101.83075714111328, + "learning_rate": 1.9974515170147533e-06, + "loss": 0.1344, + "num_input_tokens_seen": 2241216, + "step": 4565 + }, + { + "epoch": 0.6031410848620826, + "grad_norm": 9.599241256713867, + "learning_rate": 1.997418541487925e-06, + "loss": 0.07, + "num_input_tokens_seen": 2243648, + "step": 4570 + }, + { + "epoch": 0.6038009766398311, + "grad_norm": 6.536127090454102, + "learning_rate": 1.9973853542662336e-06, + "loss": 0.1225, + "num_input_tokens_seen": 2246080, + "step": 4575 + }, + { + "epoch": 0.6044608684175795, + "grad_norm": 0.32332703471183777, + "learning_rate": 1.9973519553567233e-06, + "loss": 0.0581, + "num_input_tokens_seen": 2248256, + "step": 4580 + }, + { + "epoch": 0.605120760195328, + "grad_norm": 0.3811197280883789, + "learning_rate": 1.9973183447664826e-06, + "loss": 0.0503, + "num_input_tokens_seen": 2250688, + "step": 4585 + }, + { + "epoch": 0.6057806519730764, + "grad_norm": 15.983846664428711, + "learning_rate": 1.9972845225026458e-06, + "loss": 0.2459, + "num_input_tokens_seen": 2253120, + "step": 4590 + }, + { + "epoch": 0.6064405437508249, + "grad_norm": 15.699974060058594, + "learning_rate": 1.99725048857239e-06, + "loss": 0.2109, + "num_input_tokens_seen": 2255360, + "step": 4595 + }, + { + "epoch": 0.6071004355285733, + "grad_norm": 0.4910595118999481, + "learning_rate": 1.99721624298294e-06, + "loss": 0.1823, + "num_input_tokens_seen": 2257728, + "step": 4600 + }, + { + "epoch": 0.6077603273063218, + "grad_norm": 0.5166888236999512, + "learning_rate": 1.997181785741564e-06, + "loss": 0.0686, + "num_input_tokens_seen": 2260224, + "step": 4605 + }, + { + "epoch": 0.6084202190840702, + "grad_norm": 0.6000028848648071, + "learning_rate": 1.9971471168555746e-06, + "loss": 0.0076, + "num_input_tokens_seen": 2262912, + "step": 4610 + }, + { + "epoch": 0.6090801108618187, + "grad_norm": 15.50414752960205, + "learning_rate": 1.9971122363323307e-06, + "loss": 0.2299, + "num_input_tokens_seen": 2265152, + "step": 4615 + }, + { + "epoch": 0.6097400026395671, + "grad_norm": 0.13759158551692963, + "learning_rate": 1.9970771441792347e-06, + "loss": 0.0823, + "num_input_tokens_seen": 2267968, + "step": 4620 + }, + { + "epoch": 0.6103998944173156, + "grad_norm": 0.09345412999391556, + "learning_rate": 1.997041840403735e-06, + "loss": 0.1992, + "num_input_tokens_seen": 2270336, + "step": 4625 + }, + { + "epoch": 0.611059786195064, + "grad_norm": 12.803251266479492, + "learning_rate": 1.997006325013325e-06, + "loss": 0.2115, + "num_input_tokens_seen": 2273024, + "step": 4630 + }, + { + "epoch": 0.6117196779728125, + "grad_norm": 0.13515806198120117, + "learning_rate": 1.9969705980155426e-06, + "loss": 0.0794, + "num_input_tokens_seen": 2275264, + "step": 4635 + }, + { + "epoch": 0.612379569750561, + "grad_norm": 20.334821701049805, + "learning_rate": 1.99693465941797e-06, + "loss": 0.0776, + "num_input_tokens_seen": 2277888, + "step": 4640 + }, + { + "epoch": 0.6130394615283093, + "grad_norm": 0.2252114862203598, + "learning_rate": 1.9968985092282354e-06, + "loss": 0.002, + "num_input_tokens_seen": 2280320, + "step": 4645 + }, + { + "epoch": 0.6136993533060578, + "grad_norm": 132.18417358398438, + "learning_rate": 1.996862147454011e-06, + "loss": 0.0637, + "num_input_tokens_seen": 2282560, + "step": 4650 + }, + { + "epoch": 0.6143592450838062, + "grad_norm": 0.08274441957473755, + "learning_rate": 1.9968255741030144e-06, + "loss": 0.121, + "num_input_tokens_seen": 2284864, + "step": 4655 + }, + { + "epoch": 0.6150191368615547, + "grad_norm": 0.13016772270202637, + "learning_rate": 1.9967887891830082e-06, + "loss": 0.1595, + "num_input_tokens_seen": 2287168, + "step": 4660 + }, + { + "epoch": 0.6156790286393031, + "grad_norm": 1.3486446142196655, + "learning_rate": 1.9967517927017995e-06, + "loss": 0.0561, + "num_input_tokens_seen": 2289600, + "step": 4665 + }, + { + "epoch": 0.6163389204170516, + "grad_norm": 142.54844665527344, + "learning_rate": 1.996714584667241e-06, + "loss": 0.1015, + "num_input_tokens_seen": 2292160, + "step": 4670 + }, + { + "epoch": 0.6169988121948, + "grad_norm": 0.17757734656333923, + "learning_rate": 1.9966771650872295e-06, + "loss": 0.0621, + "num_input_tokens_seen": 2294912, + "step": 4675 + }, + { + "epoch": 0.6176587039725485, + "grad_norm": 0.3973993957042694, + "learning_rate": 1.996639533969707e-06, + "loss": 0.0647, + "num_input_tokens_seen": 2297024, + "step": 4680 + }, + { + "epoch": 0.6183185957502969, + "grad_norm": 12.3432035446167, + "learning_rate": 1.9966016913226602e-06, + "loss": 0.2015, + "num_input_tokens_seen": 2299456, + "step": 4685 + }, + { + "epoch": 0.6189784875280454, + "grad_norm": 0.01895112358033657, + "learning_rate": 1.9965636371541217e-06, + "loss": 0.062, + "num_input_tokens_seen": 2301568, + "step": 4690 + }, + { + "epoch": 0.6196383793057938, + "grad_norm": 119.7965316772461, + "learning_rate": 1.9965253714721676e-06, + "loss": 0.1759, + "num_input_tokens_seen": 2303936, + "step": 4695 + }, + { + "epoch": 0.6202982710835423, + "grad_norm": 12.308302879333496, + "learning_rate": 1.99648689428492e-06, + "loss": 0.1793, + "num_input_tokens_seen": 2306176, + "step": 4700 + }, + { + "epoch": 0.6209581628612908, + "grad_norm": 0.34794431924819946, + "learning_rate": 1.9964482056005446e-06, + "loss": 0.046, + "num_input_tokens_seen": 2308736, + "step": 4705 + }, + { + "epoch": 0.6216180546390392, + "grad_norm": 14.829720497131348, + "learning_rate": 1.9964093054272534e-06, + "loss": 0.2129, + "num_input_tokens_seen": 2311104, + "step": 4710 + }, + { + "epoch": 0.6222779464167877, + "grad_norm": 123.25777435302734, + "learning_rate": 1.9963701937733024e-06, + "loss": 0.1223, + "num_input_tokens_seen": 2313536, + "step": 4715 + }, + { + "epoch": 0.6229378381945361, + "grad_norm": 43.15277862548828, + "learning_rate": 1.9963308706469932e-06, + "loss": 0.1751, + "num_input_tokens_seen": 2316032, + "step": 4720 + }, + { + "epoch": 0.6235977299722846, + "grad_norm": 11.635313987731934, + "learning_rate": 1.9962913360566713e-06, + "loss": 0.2008, + "num_input_tokens_seen": 2318656, + "step": 4725 + }, + { + "epoch": 0.624257621750033, + "grad_norm": 14.797162055969238, + "learning_rate": 1.9962515900107283e-06, + "loss": 0.1295, + "num_input_tokens_seen": 2321216, + "step": 4730 + }, + { + "epoch": 0.6249175135277815, + "grad_norm": 1.9820233583450317, + "learning_rate": 1.9962116325175993e-06, + "loss": 0.1381, + "num_input_tokens_seen": 2323648, + "step": 4735 + }, + { + "epoch": 0.6255774053055299, + "grad_norm": 61.69966125488281, + "learning_rate": 1.996171463585765e-06, + "loss": 0.1807, + "num_input_tokens_seen": 2326080, + "step": 4740 + }, + { + "epoch": 0.6262372970832784, + "grad_norm": 14.098520278930664, + "learning_rate": 1.996131083223752e-06, + "loss": 0.097, + "num_input_tokens_seen": 2328512, + "step": 4745 + }, + { + "epoch": 0.6268971888610267, + "grad_norm": 2.896374464035034, + "learning_rate": 1.9960904914401295e-06, + "loss": 0.1358, + "num_input_tokens_seen": 2331008, + "step": 4750 + }, + { + "epoch": 0.6275570806387752, + "grad_norm": 67.75291442871094, + "learning_rate": 1.9960496882435138e-06, + "loss": 0.0575, + "num_input_tokens_seen": 2333376, + "step": 4755 + }, + { + "epoch": 0.6282169724165237, + "grad_norm": 37.26298141479492, + "learning_rate": 1.996008673642564e-06, + "loss": 0.2401, + "num_input_tokens_seen": 2335872, + "step": 4760 + }, + { + "epoch": 0.6288768641942721, + "grad_norm": 0.15088030695915222, + "learning_rate": 1.995967447645986e-06, + "loss": 0.0035, + "num_input_tokens_seen": 2338432, + "step": 4765 + }, + { + "epoch": 0.6295367559720206, + "grad_norm": 181.27249145507812, + "learning_rate": 1.9959260102625293e-06, + "loss": 0.2603, + "num_input_tokens_seen": 2340928, + "step": 4770 + }, + { + "epoch": 0.630196647749769, + "grad_norm": 0.40237560868263245, + "learning_rate": 1.9958843615009892e-06, + "loss": 0.1541, + "num_input_tokens_seen": 2343680, + "step": 4775 + }, + { + "epoch": 0.6308565395275175, + "grad_norm": 0.08449136465787888, + "learning_rate": 1.995842501370205e-06, + "loss": 0.062, + "num_input_tokens_seen": 2346240, + "step": 4780 + }, + { + "epoch": 0.6315164313052659, + "grad_norm": 0.08802822232246399, + "learning_rate": 1.9958004298790607e-06, + "loss": 0.1132, + "num_input_tokens_seen": 2348544, + "step": 4785 + }, + { + "epoch": 0.6321763230830144, + "grad_norm": 0.11865068972110748, + "learning_rate": 1.9957581470364867e-06, + "loss": 0.3771, + "num_input_tokens_seen": 2350976, + "step": 4790 + }, + { + "epoch": 0.6328362148607628, + "grad_norm": 40.807823181152344, + "learning_rate": 1.9957156528514564e-06, + "loss": 0.1463, + "num_input_tokens_seen": 2353216, + "step": 4795 + }, + { + "epoch": 0.6334961066385113, + "grad_norm": 0.09082300215959549, + "learning_rate": 1.995672947332989e-06, + "loss": 0.1893, + "num_input_tokens_seen": 2355584, + "step": 4800 + }, + { + "epoch": 0.6341559984162597, + "grad_norm": 0.1805894523859024, + "learning_rate": 1.995630030490149e-06, + "loss": 0.0834, + "num_input_tokens_seen": 2358144, + "step": 4805 + }, + { + "epoch": 0.6348158901940082, + "grad_norm": 47.16820526123047, + "learning_rate": 1.9955869023320447e-06, + "loss": 0.0498, + "num_input_tokens_seen": 2360896, + "step": 4810 + }, + { + "epoch": 0.6354757819717566, + "grad_norm": 0.7773832082748413, + "learning_rate": 1.99554356286783e-06, + "loss": 0.2227, + "num_input_tokens_seen": 2363264, + "step": 4815 + }, + { + "epoch": 0.6361356737495051, + "grad_norm": 13.574986457824707, + "learning_rate": 1.9955000121067035e-06, + "loss": 0.0659, + "num_input_tokens_seen": 2365632, + "step": 4820 + }, + { + "epoch": 0.6367955655272536, + "grad_norm": 0.08415788412094116, + "learning_rate": 1.9954562500579075e-06, + "loss": 0.0047, + "num_input_tokens_seen": 2368000, + "step": 4825 + }, + { + "epoch": 0.637455457305002, + "grad_norm": 119.38396453857422, + "learning_rate": 1.9954122767307316e-06, + "loss": 0.122, + "num_input_tokens_seen": 2370560, + "step": 4830 + }, + { + "epoch": 0.6381153490827505, + "grad_norm": 0.06524139642715454, + "learning_rate": 1.995368092134508e-06, + "loss": 0.0527, + "num_input_tokens_seen": 2373120, + "step": 4835 + }, + { + "epoch": 0.6387752408604989, + "grad_norm": 1.1225311756134033, + "learning_rate": 1.9953236962786143e-06, + "loss": 0.004, + "num_input_tokens_seen": 2375872, + "step": 4840 + }, + { + "epoch": 0.6394351326382474, + "grad_norm": 0.058993130922317505, + "learning_rate": 1.995279089172474e-06, + "loss": 0.2211, + "num_input_tokens_seen": 2378432, + "step": 4845 + }, + { + "epoch": 0.6400950244159958, + "grad_norm": 30.01612091064453, + "learning_rate": 1.9952342708255543e-06, + "loss": 0.1565, + "num_input_tokens_seen": 2380800, + "step": 4850 + }, + { + "epoch": 0.6407549161937443, + "grad_norm": 0.034984275698661804, + "learning_rate": 1.9951892412473677e-06, + "loss": 0.1074, + "num_input_tokens_seen": 2383744, + "step": 4855 + }, + { + "epoch": 0.6414148079714926, + "grad_norm": 13.939770698547363, + "learning_rate": 1.9951440004474707e-06, + "loss": 0.124, + "num_input_tokens_seen": 2386112, + "step": 4860 + }, + { + "epoch": 0.6420746997492411, + "grad_norm": 62.33798599243164, + "learning_rate": 1.9950985484354664e-06, + "loss": 0.1265, + "num_input_tokens_seen": 2388736, + "step": 4865 + }, + { + "epoch": 0.6427345915269895, + "grad_norm": 0.21689827740192413, + "learning_rate": 1.9950528852210013e-06, + "loss": 0.0334, + "num_input_tokens_seen": 2391104, + "step": 4870 + }, + { + "epoch": 0.643394483304738, + "grad_norm": 24.510175704956055, + "learning_rate": 1.9950070108137663e-06, + "loss": 0.1468, + "num_input_tokens_seen": 2393728, + "step": 4875 + }, + { + "epoch": 0.6440543750824864, + "grad_norm": 0.9391570091247559, + "learning_rate": 1.9949609252234985e-06, + "loss": 0.1562, + "num_input_tokens_seen": 2396480, + "step": 4880 + }, + { + "epoch": 0.6447142668602349, + "grad_norm": 0.11992018669843674, + "learning_rate": 1.9949146284599794e-06, + "loss": 0.1169, + "num_input_tokens_seen": 2399104, + "step": 4885 + }, + { + "epoch": 0.6453741586379834, + "grad_norm": 54.8850212097168, + "learning_rate": 1.9948681205330354e-06, + "loss": 0.096, + "num_input_tokens_seen": 2401664, + "step": 4890 + }, + { + "epoch": 0.6460340504157318, + "grad_norm": 27.9152889251709, + "learning_rate": 1.994821401452537e-06, + "loss": 0.1063, + "num_input_tokens_seen": 2404160, + "step": 4895 + }, + { + "epoch": 0.6466939421934803, + "grad_norm": 0.24877403676509857, + "learning_rate": 1.9947744712283997e-06, + "loss": 0.0837, + "num_input_tokens_seen": 2406592, + "step": 4900 + }, + { + "epoch": 0.6473538339712287, + "grad_norm": 0.04175091162323952, + "learning_rate": 1.9947273298705848e-06, + "loss": 0.0537, + "num_input_tokens_seen": 2409088, + "step": 4905 + }, + { + "epoch": 0.6480137257489772, + "grad_norm": 0.4560162425041199, + "learning_rate": 1.994679977389097e-06, + "loss": 0.173, + "num_input_tokens_seen": 2411584, + "step": 4910 + }, + { + "epoch": 0.6486736175267256, + "grad_norm": 0.3465970754623413, + "learning_rate": 1.9946324137939876e-06, + "loss": 0.3713, + "num_input_tokens_seen": 2414400, + "step": 4915 + }, + { + "epoch": 0.6493335093044741, + "grad_norm": 35.83467102050781, + "learning_rate": 1.9945846390953503e-06, + "loss": 0.22, + "num_input_tokens_seen": 2416640, + "step": 4920 + }, + { + "epoch": 0.6499934010822225, + "grad_norm": 18.50018310546875, + "learning_rate": 1.994536653303326e-06, + "loss": 0.1468, + "num_input_tokens_seen": 2419136, + "step": 4925 + }, + { + "epoch": 0.650653292859971, + "grad_norm": 0.6801294088363647, + "learning_rate": 1.9944884564280987e-06, + "loss": 0.0354, + "num_input_tokens_seen": 2421440, + "step": 4930 + }, + { + "epoch": 0.6513131846377194, + "grad_norm": 0.1738162487745285, + "learning_rate": 1.994440048479898e-06, + "loss": 0.0854, + "num_input_tokens_seen": 2424000, + "step": 4935 + }, + { + "epoch": 0.6519730764154679, + "grad_norm": 57.80926513671875, + "learning_rate": 1.9943914294689984e-06, + "loss": 0.0808, + "num_input_tokens_seen": 2426240, + "step": 4940 + }, + { + "epoch": 0.6526329681932164, + "grad_norm": 0.08340390771627426, + "learning_rate": 1.9943425994057184e-06, + "loss": 0.0641, + "num_input_tokens_seen": 2428864, + "step": 4945 + }, + { + "epoch": 0.6532928599709648, + "grad_norm": 0.4628424346446991, + "learning_rate": 1.994293558300422e-06, + "loss": 0.0188, + "num_input_tokens_seen": 2431296, + "step": 4950 + }, + { + "epoch": 0.6539527517487133, + "grad_norm": 0.0313633531332016, + "learning_rate": 1.9942443061635183e-06, + "loss": 0.0037, + "num_input_tokens_seen": 2433984, + "step": 4955 + }, + { + "epoch": 0.6546126435264616, + "grad_norm": 0.10239587724208832, + "learning_rate": 1.9941948430054603e-06, + "loss": 0.5298, + "num_input_tokens_seen": 2436224, + "step": 4960 + }, + { + "epoch": 0.6552725353042101, + "grad_norm": 0.33065155148506165, + "learning_rate": 1.994145168836746e-06, + "loss": 0.1602, + "num_input_tokens_seen": 2438720, + "step": 4965 + }, + { + "epoch": 0.6559324270819585, + "grad_norm": 0.1416037231683731, + "learning_rate": 1.994095283667919e-06, + "loss": 0.1132, + "num_input_tokens_seen": 2440960, + "step": 4970 + }, + { + "epoch": 0.656592318859707, + "grad_norm": 26.69266128540039, + "learning_rate": 1.9940451875095666e-06, + "loss": 0.0129, + "num_input_tokens_seen": 2443328, + "step": 4975 + }, + { + "epoch": 0.6572522106374554, + "grad_norm": 0.09884212166070938, + "learning_rate": 1.9939948803723217e-06, + "loss": 0.1357, + "num_input_tokens_seen": 2445952, + "step": 4980 + }, + { + "epoch": 0.6579121024152039, + "grad_norm": 0.06190844997763634, + "learning_rate": 1.9939443622668614e-06, + "loss": 0.0527, + "num_input_tokens_seen": 2448576, + "step": 4985 + }, + { + "epoch": 0.6585719941929523, + "grad_norm": 13.247187614440918, + "learning_rate": 1.9938936332039073e-06, + "loss": 0.3274, + "num_input_tokens_seen": 2451136, + "step": 4990 + }, + { + "epoch": 0.6592318859707008, + "grad_norm": 0.5872359871864319, + "learning_rate": 1.993842693194227e-06, + "loss": 0.0122, + "num_input_tokens_seen": 2453632, + "step": 4995 + }, + { + "epoch": 0.6598917777484492, + "grad_norm": 6.910613536834717, + "learning_rate": 1.993791542248632e-06, + "loss": 0.1135, + "num_input_tokens_seen": 2456192, + "step": 5000 + }, + { + "epoch": 0.6605516695261977, + "grad_norm": 0.10543259233236313, + "learning_rate": 1.9937401803779784e-06, + "loss": 0.0259, + "num_input_tokens_seen": 2458624, + "step": 5005 + }, + { + "epoch": 0.6612115613039462, + "grad_norm": 0.3452533185482025, + "learning_rate": 1.9936886075931678e-06, + "loss": 0.0829, + "num_input_tokens_seen": 2460928, + "step": 5010 + }, + { + "epoch": 0.6618714530816946, + "grad_norm": 3.1825904846191406, + "learning_rate": 1.993636823905146e-06, + "loss": 0.0726, + "num_input_tokens_seen": 2463552, + "step": 5015 + }, + { + "epoch": 0.6625313448594431, + "grad_norm": 75.93983459472656, + "learning_rate": 1.9935848293249034e-06, + "loss": 0.0881, + "num_input_tokens_seen": 2465856, + "step": 5020 + }, + { + "epoch": 0.6631912366371915, + "grad_norm": 0.03516853600740433, + "learning_rate": 1.9935326238634763e-06, + "loss": 0.044, + "num_input_tokens_seen": 2468288, + "step": 5025 + }, + { + "epoch": 0.66385112841494, + "grad_norm": 52.587249755859375, + "learning_rate": 1.993480207531944e-06, + "loss": 0.3045, + "num_input_tokens_seen": 2470912, + "step": 5030 + }, + { + "epoch": 0.6645110201926884, + "grad_norm": 0.8322742581367493, + "learning_rate": 1.9934275803414317e-06, + "loss": 0.1027, + "num_input_tokens_seen": 2473536, + "step": 5035 + }, + { + "epoch": 0.6651709119704369, + "grad_norm": 1.7283278703689575, + "learning_rate": 1.99337474230311e-06, + "loss": 0.0028, + "num_input_tokens_seen": 2476032, + "step": 5040 + }, + { + "epoch": 0.6658308037481853, + "grad_norm": 0.27299973368644714, + "learning_rate": 1.993321693428192e-06, + "loss": 0.2344, + "num_input_tokens_seen": 2478208, + "step": 5045 + }, + { + "epoch": 0.6664906955259338, + "grad_norm": 0.16945065557956696, + "learning_rate": 1.9932684337279378e-06, + "loss": 0.1178, + "num_input_tokens_seen": 2480512, + "step": 5050 + }, + { + "epoch": 0.6671505873036822, + "grad_norm": 0.08358097821474075, + "learning_rate": 1.9932149632136514e-06, + "loss": 0.2015, + "num_input_tokens_seen": 2483008, + "step": 5055 + }, + { + "epoch": 0.6678104790814307, + "grad_norm": 0.04497074335813522, + "learning_rate": 1.9931612818966812e-06, + "loss": 0.2345, + "num_input_tokens_seen": 2485376, + "step": 5060 + }, + { + "epoch": 0.668470370859179, + "grad_norm": 0.6962704658508301, + "learning_rate": 1.993107389788421e-06, + "loss": 0.0378, + "num_input_tokens_seen": 2488064, + "step": 5065 + }, + { + "epoch": 0.6691302626369275, + "grad_norm": 21.029521942138672, + "learning_rate": 1.9930532869003085e-06, + "loss": 0.0923, + "num_input_tokens_seen": 2490624, + "step": 5070 + }, + { + "epoch": 0.669790154414676, + "grad_norm": 24.72555923461914, + "learning_rate": 1.992998973243827e-06, + "loss": 0.4066, + "num_input_tokens_seen": 2493248, + "step": 5075 + }, + { + "epoch": 0.6704500461924244, + "grad_norm": 32.65169906616211, + "learning_rate": 1.9929444488305047e-06, + "loss": 0.1969, + "num_input_tokens_seen": 2495744, + "step": 5080 + }, + { + "epoch": 0.6711099379701729, + "grad_norm": 0.13829834759235382, + "learning_rate": 1.992889713671913e-06, + "loss": 0.0028, + "num_input_tokens_seen": 2498176, + "step": 5085 + }, + { + "epoch": 0.6717698297479213, + "grad_norm": 0.2096666544675827, + "learning_rate": 1.99283476777967e-06, + "loss": 0.0702, + "num_input_tokens_seen": 2500416, + "step": 5090 + }, + { + "epoch": 0.6724297215256698, + "grad_norm": 21.304018020629883, + "learning_rate": 1.9927796111654366e-06, + "loss": 0.1533, + "num_input_tokens_seen": 2502848, + "step": 5095 + }, + { + "epoch": 0.6730896133034182, + "grad_norm": 65.4073257446289, + "learning_rate": 1.99272424384092e-06, + "loss": 0.1575, + "num_input_tokens_seen": 2505152, + "step": 5100 + }, + { + "epoch": 0.6737495050811667, + "grad_norm": 51.36351776123047, + "learning_rate": 1.992668665817871e-06, + "loss": 0.1766, + "num_input_tokens_seen": 2507648, + "step": 5105 + }, + { + "epoch": 0.6744093968589151, + "grad_norm": 0.4049692153930664, + "learning_rate": 1.9926128771080867e-06, + "loss": 0.0742, + "num_input_tokens_seen": 2510144, + "step": 5110 + }, + { + "epoch": 0.6750692886366636, + "grad_norm": 0.48001331090927124, + "learning_rate": 1.9925568777234067e-06, + "loss": 0.2246, + "num_input_tokens_seen": 2513024, + "step": 5115 + }, + { + "epoch": 0.675729180414412, + "grad_norm": 0.321492999792099, + "learning_rate": 1.992500667675717e-06, + "loss": 0.0633, + "num_input_tokens_seen": 2515072, + "step": 5120 + }, + { + "epoch": 0.6763890721921605, + "grad_norm": 5.6695990562438965, + "learning_rate": 1.992444246976948e-06, + "loss": 0.0047, + "num_input_tokens_seen": 2517376, + "step": 5125 + }, + { + "epoch": 0.677048963969909, + "grad_norm": 0.6470169425010681, + "learning_rate": 1.9923876156390743e-06, + "loss": 0.0688, + "num_input_tokens_seen": 2520064, + "step": 5130 + }, + { + "epoch": 0.6777088557476574, + "grad_norm": 2.185170888900757, + "learning_rate": 1.992330773674115e-06, + "loss": 0.0592, + "num_input_tokens_seen": 2522688, + "step": 5135 + }, + { + "epoch": 0.6783687475254059, + "grad_norm": 0.08854498714208603, + "learning_rate": 1.9922737210941353e-06, + "loss": 0.0356, + "num_input_tokens_seen": 2525184, + "step": 5140 + }, + { + "epoch": 0.6790286393031543, + "grad_norm": 0.012564142234623432, + "learning_rate": 1.9922164579112436e-06, + "loss": 0.0004, + "num_input_tokens_seen": 2527552, + "step": 5145 + }, + { + "epoch": 0.6796885310809028, + "grad_norm": 68.29488372802734, + "learning_rate": 1.9921589841375938e-06, + "loss": 0.0067, + "num_input_tokens_seen": 2530240, + "step": 5150 + }, + { + "epoch": 0.6803484228586512, + "grad_norm": 199.47763061523438, + "learning_rate": 1.9921012997853843e-06, + "loss": 0.0581, + "num_input_tokens_seen": 2532480, + "step": 5155 + }, + { + "epoch": 0.6810083146363997, + "grad_norm": 0.01096346229314804, + "learning_rate": 1.9920434048668582e-06, + "loss": 0.0488, + "num_input_tokens_seen": 2534912, + "step": 5160 + }, + { + "epoch": 0.681668206414148, + "grad_norm": 0.222233846783638, + "learning_rate": 1.9919852993943035e-06, + "loss": 0.1032, + "num_input_tokens_seen": 2537408, + "step": 5165 + }, + { + "epoch": 0.6823280981918965, + "grad_norm": 0.04326486587524414, + "learning_rate": 1.991926983380052e-06, + "loss": 0.3101, + "num_input_tokens_seen": 2539776, + "step": 5170 + }, + { + "epoch": 0.6829879899696449, + "grad_norm": 0.02621627412736416, + "learning_rate": 1.9918684568364813e-06, + "loss": 0.0739, + "num_input_tokens_seen": 2542208, + "step": 5175 + }, + { + "epoch": 0.6836478817473934, + "grad_norm": 0.6199452877044678, + "learning_rate": 1.9918097197760134e-06, + "loss": 0.0663, + "num_input_tokens_seen": 2544704, + "step": 5180 + }, + { + "epoch": 0.6843077735251418, + "grad_norm": 0.917776882648468, + "learning_rate": 1.9917507722111144e-06, + "loss": 0.1005, + "num_input_tokens_seen": 2547072, + "step": 5185 + }, + { + "epoch": 0.6849676653028903, + "grad_norm": 0.5460265874862671, + "learning_rate": 1.9916916141542957e-06, + "loss": 0.1848, + "num_input_tokens_seen": 2549440, + "step": 5190 + }, + { + "epoch": 0.6856275570806388, + "grad_norm": 11.587053298950195, + "learning_rate": 1.991632245618113e-06, + "loss": 0.4134, + "num_input_tokens_seen": 2552128, + "step": 5195 + }, + { + "epoch": 0.6862874488583872, + "grad_norm": 0.13508452475070953, + "learning_rate": 1.9915726666151673e-06, + "loss": 0.0699, + "num_input_tokens_seen": 2554368, + "step": 5200 + }, + { + "epoch": 0.6869473406361357, + "grad_norm": 0.03892385959625244, + "learning_rate": 1.9915128771581033e-06, + "loss": 0.1567, + "num_input_tokens_seen": 2556928, + "step": 5205 + }, + { + "epoch": 0.6876072324138841, + "grad_norm": 12.882155418395996, + "learning_rate": 1.9914528772596113e-06, + "loss": 0.17, + "num_input_tokens_seen": 2559360, + "step": 5210 + }, + { + "epoch": 0.6882671241916326, + "grad_norm": 0.14622516930103302, + "learning_rate": 1.9913926669324253e-06, + "loss": 0.2589, + "num_input_tokens_seen": 2561856, + "step": 5215 + }, + { + "epoch": 0.688927015969381, + "grad_norm": 26.672409057617188, + "learning_rate": 1.991332246189325e-06, + "loss": 0.2197, + "num_input_tokens_seen": 2564352, + "step": 5220 + }, + { + "epoch": 0.6895869077471295, + "grad_norm": 0.45475801825523376, + "learning_rate": 1.9912716150431343e-06, + "loss": 0.0444, + "num_input_tokens_seen": 2566784, + "step": 5225 + }, + { + "epoch": 0.6902467995248779, + "grad_norm": 32.12901306152344, + "learning_rate": 1.9912107735067215e-06, + "loss": 0.1357, + "num_input_tokens_seen": 2569152, + "step": 5230 + }, + { + "epoch": 0.6909066913026264, + "grad_norm": 0.20969435572624207, + "learning_rate": 1.991149721593e-06, + "loss": 0.0881, + "num_input_tokens_seen": 2571520, + "step": 5235 + }, + { + "epoch": 0.6915665830803748, + "grad_norm": 1.3807674646377563, + "learning_rate": 1.991088459314927e-06, + "loss": 0.0346, + "num_input_tokens_seen": 2574080, + "step": 5240 + }, + { + "epoch": 0.6922264748581233, + "grad_norm": 0.21307876706123352, + "learning_rate": 1.991026986685506e-06, + "loss": 0.0699, + "num_input_tokens_seen": 2576320, + "step": 5245 + }, + { + "epoch": 0.6928863666358717, + "grad_norm": 0.09592917561531067, + "learning_rate": 1.9909653037177826e-06, + "loss": 0.073, + "num_input_tokens_seen": 2578752, + "step": 5250 + }, + { + "epoch": 0.6935462584136202, + "grad_norm": 24.82294464111328, + "learning_rate": 1.9909034104248503e-06, + "loss": 0.0697, + "num_input_tokens_seen": 2581184, + "step": 5255 + }, + { + "epoch": 0.6942061501913687, + "grad_norm": 2.019487142562866, + "learning_rate": 1.9908413068198442e-06, + "loss": 0.1952, + "num_input_tokens_seen": 2583872, + "step": 5260 + }, + { + "epoch": 0.694866041969117, + "grad_norm": 0.2514612078666687, + "learning_rate": 1.990778992915946e-06, + "loss": 0.2296, + "num_input_tokens_seen": 2586304, + "step": 5265 + }, + { + "epoch": 0.6955259337468656, + "grad_norm": 0.08101149648427963, + "learning_rate": 1.990716468726381e-06, + "loss": 0.1202, + "num_input_tokens_seen": 2589056, + "step": 5270 + }, + { + "epoch": 0.6961858255246139, + "grad_norm": 11.269828796386719, + "learning_rate": 1.9906537342644203e-06, + "loss": 0.1517, + "num_input_tokens_seen": 2591488, + "step": 5275 + }, + { + "epoch": 0.6968457173023624, + "grad_norm": 1.0551068782806396, + "learning_rate": 1.990590789543378e-06, + "loss": 0.0183, + "num_input_tokens_seen": 2593792, + "step": 5280 + }, + { + "epoch": 0.6975056090801108, + "grad_norm": 0.9069265127182007, + "learning_rate": 1.9905276345766134e-06, + "loss": 0.2654, + "num_input_tokens_seen": 2595968, + "step": 5285 + }, + { + "epoch": 0.6981655008578593, + "grad_norm": 14.611425399780273, + "learning_rate": 1.990464269377532e-06, + "loss": 0.2556, + "num_input_tokens_seen": 2598144, + "step": 5290 + }, + { + "epoch": 0.6988253926356077, + "grad_norm": 0.7366898655891418, + "learning_rate": 1.9904006939595815e-06, + "loss": 0.0923, + "num_input_tokens_seen": 2600448, + "step": 5295 + }, + { + "epoch": 0.6994852844133562, + "grad_norm": 0.2763810157775879, + "learning_rate": 1.9903369083362554e-06, + "loss": 0.0051, + "num_input_tokens_seen": 2602944, + "step": 5300 + }, + { + "epoch": 0.7001451761911046, + "grad_norm": 82.81452178955078, + "learning_rate": 1.990272912521092e-06, + "loss": 0.1914, + "num_input_tokens_seen": 2605120, + "step": 5305 + }, + { + "epoch": 0.7008050679688531, + "grad_norm": 27.29536247253418, + "learning_rate": 1.990208706527674e-06, + "loss": 0.0594, + "num_input_tokens_seen": 2607296, + "step": 5310 + }, + { + "epoch": 0.7014649597466015, + "grad_norm": 0.053431663662195206, + "learning_rate": 1.9901442903696284e-06, + "loss": 0.0578, + "num_input_tokens_seen": 2609728, + "step": 5315 + }, + { + "epoch": 0.70212485152435, + "grad_norm": 14.237296104431152, + "learning_rate": 1.990079664060628e-06, + "loss": 0.2463, + "num_input_tokens_seen": 2612224, + "step": 5320 + }, + { + "epoch": 0.7027847433020985, + "grad_norm": 48.734046936035156, + "learning_rate": 1.9900148276143874e-06, + "loss": 0.0071, + "num_input_tokens_seen": 2614720, + "step": 5325 + }, + { + "epoch": 0.7034446350798469, + "grad_norm": 0.6822793483734131, + "learning_rate": 1.9899497810446694e-06, + "loss": 0.3149, + "num_input_tokens_seen": 2617344, + "step": 5330 + }, + { + "epoch": 0.7041045268575954, + "grad_norm": 17.468055725097656, + "learning_rate": 1.989884524365279e-06, + "loss": 0.1851, + "num_input_tokens_seen": 2619584, + "step": 5335 + }, + { + "epoch": 0.7047644186353438, + "grad_norm": 0.7687005400657654, + "learning_rate": 1.9898190575900664e-06, + "loss": 0.0551, + "num_input_tokens_seen": 2621888, + "step": 5340 + }, + { + "epoch": 0.7054243104130923, + "grad_norm": 0.17701426148414612, + "learning_rate": 1.9897533807329265e-06, + "loss": 0.1479, + "num_input_tokens_seen": 2624512, + "step": 5345 + }, + { + "epoch": 0.7060842021908407, + "grad_norm": 0.30073249340057373, + "learning_rate": 1.989687493807799e-06, + "loss": 0.0035, + "num_input_tokens_seen": 2627008, + "step": 5350 + }, + { + "epoch": 0.7067440939685892, + "grad_norm": 0.2257729321718216, + "learning_rate": 1.9896213968286672e-06, + "loss": 0.0406, + "num_input_tokens_seen": 2629440, + "step": 5355 + }, + { + "epoch": 0.7074039857463376, + "grad_norm": 0.09596077352762222, + "learning_rate": 1.9895550898095606e-06, + "loss": 0.1103, + "num_input_tokens_seen": 2631872, + "step": 5360 + }, + { + "epoch": 0.7080638775240861, + "grad_norm": 0.2598581910133362, + "learning_rate": 1.9894885727645516e-06, + "loss": 0.1771, + "num_input_tokens_seen": 2634560, + "step": 5365 + }, + { + "epoch": 0.7087237693018344, + "grad_norm": 16.17540168762207, + "learning_rate": 1.989421845707759e-06, + "loss": 0.2135, + "num_input_tokens_seen": 2637120, + "step": 5370 + }, + { + "epoch": 0.709383661079583, + "grad_norm": 0.3493006229400635, + "learning_rate": 1.989354908653344e-06, + "loss": 0.0021, + "num_input_tokens_seen": 2639552, + "step": 5375 + }, + { + "epoch": 0.7100435528573315, + "grad_norm": 92.4169692993164, + "learning_rate": 1.989287761615514e-06, + "loss": 0.1498, + "num_input_tokens_seen": 2641984, + "step": 5380 + }, + { + "epoch": 0.7107034446350798, + "grad_norm": 12.568245887756348, + "learning_rate": 1.9892204046085206e-06, + "loss": 0.0816, + "num_input_tokens_seen": 2644352, + "step": 5385 + }, + { + "epoch": 0.7113633364128283, + "grad_norm": 0.05369238555431366, + "learning_rate": 1.98915283764666e-06, + "loss": 0.0261, + "num_input_tokens_seen": 2647040, + "step": 5390 + }, + { + "epoch": 0.7120232281905767, + "grad_norm": 0.1250723898410797, + "learning_rate": 1.989085060744272e-06, + "loss": 0.0705, + "num_input_tokens_seen": 2649472, + "step": 5395 + }, + { + "epoch": 0.7126831199683252, + "grad_norm": 1.4033637046813965, + "learning_rate": 1.989017073915742e-06, + "loss": 0.0431, + "num_input_tokens_seen": 2651840, + "step": 5400 + }, + { + "epoch": 0.7133430117460736, + "grad_norm": 61.13285446166992, + "learning_rate": 1.9889488771755004e-06, + "loss": 0.0093, + "num_input_tokens_seen": 2654464, + "step": 5405 + }, + { + "epoch": 0.7140029035238221, + "grad_norm": 0.004062811844050884, + "learning_rate": 1.9888804705380207e-06, + "loss": 0.1071, + "num_input_tokens_seen": 2656576, + "step": 5410 + }, + { + "epoch": 0.7146627953015705, + "grad_norm": 0.12063659727573395, + "learning_rate": 1.9888118540178228e-06, + "loss": 0.0828, + "num_input_tokens_seen": 2659008, + "step": 5415 + }, + { + "epoch": 0.715322687079319, + "grad_norm": 0.0075147757306694984, + "learning_rate": 1.9887430276294688e-06, + "loss": 0.0466, + "num_input_tokens_seen": 2661632, + "step": 5420 + }, + { + "epoch": 0.7159825788570674, + "grad_norm": 0.21719199419021606, + "learning_rate": 1.9886739913875666e-06, + "loss": 0.1611, + "num_input_tokens_seen": 2664192, + "step": 5425 + }, + { + "epoch": 0.7166424706348159, + "grad_norm": 130.8598175048828, + "learning_rate": 1.98860474530677e-06, + "loss": 0.0963, + "num_input_tokens_seen": 2666624, + "step": 5430 + }, + { + "epoch": 0.7173023624125643, + "grad_norm": 0.03334322199225426, + "learning_rate": 1.9885352894017745e-06, + "loss": 0.1402, + "num_input_tokens_seen": 2669120, + "step": 5435 + }, + { + "epoch": 0.7179622541903128, + "grad_norm": 4.289888381958008, + "learning_rate": 1.9884656236873224e-06, + "loss": 0.2358, + "num_input_tokens_seen": 2671552, + "step": 5440 + }, + { + "epoch": 0.7186221459680613, + "grad_norm": 13.685027122497559, + "learning_rate": 1.9883957481781998e-06, + "loss": 0.1333, + "num_input_tokens_seen": 2674240, + "step": 5445 + }, + { + "epoch": 0.7192820377458097, + "grad_norm": 0.42409083247184753, + "learning_rate": 1.988325662889237e-06, + "loss": 0.1131, + "num_input_tokens_seen": 2676544, + "step": 5450 + }, + { + "epoch": 0.7199419295235582, + "grad_norm": 0.199946328997612, + "learning_rate": 1.988255367835309e-06, + "loss": 0.0009, + "num_input_tokens_seen": 2678912, + "step": 5455 + }, + { + "epoch": 0.7206018213013066, + "grad_norm": 0.4616469442844391, + "learning_rate": 1.9881848630313357e-06, + "loss": 0.0309, + "num_input_tokens_seen": 2681344, + "step": 5460 + }, + { + "epoch": 0.7212617130790551, + "grad_norm": 130.4469757080078, + "learning_rate": 1.988114148492281e-06, + "loss": 0.0208, + "num_input_tokens_seen": 2683776, + "step": 5465 + }, + { + "epoch": 0.7219216048568035, + "grad_norm": 0.03411302715539932, + "learning_rate": 1.9880432242331534e-06, + "loss": 0.1115, + "num_input_tokens_seen": 2686016, + "step": 5470 + }, + { + "epoch": 0.722581496634552, + "grad_norm": 67.0157699584961, + "learning_rate": 1.9879720902690067e-06, + "loss": 0.1267, + "num_input_tokens_seen": 2688128, + "step": 5475 + }, + { + "epoch": 0.7232413884123003, + "grad_norm": 0.22230832278728485, + "learning_rate": 1.987900746614938e-06, + "loss": 0.2031, + "num_input_tokens_seen": 2690368, + "step": 5480 + }, + { + "epoch": 0.7239012801900488, + "grad_norm": 1.3174525499343872, + "learning_rate": 1.98782919328609e-06, + "loss": 0.0919, + "num_input_tokens_seen": 2692992, + "step": 5485 + }, + { + "epoch": 0.7245611719677972, + "grad_norm": 2.8719289302825928, + "learning_rate": 1.9877574302976484e-06, + "loss": 0.0433, + "num_input_tokens_seen": 2695424, + "step": 5490 + }, + { + "epoch": 0.7252210637455457, + "grad_norm": 12.99232292175293, + "learning_rate": 1.987685457664845e-06, + "loss": 0.1607, + "num_input_tokens_seen": 2697856, + "step": 5495 + }, + { + "epoch": 0.7258809555232941, + "grad_norm": 0.09089522063732147, + "learning_rate": 1.987613275402956e-06, + "loss": 0.0006, + "num_input_tokens_seen": 2700608, + "step": 5500 + }, + { + "epoch": 0.7265408473010426, + "grad_norm": 0.18377459049224854, + "learning_rate": 1.9875408835273007e-06, + "loss": 0.023, + "num_input_tokens_seen": 2703104, + "step": 5505 + }, + { + "epoch": 0.7272007390787911, + "grad_norm": 0.02759479358792305, + "learning_rate": 1.9874682820532444e-06, + "loss": 0.1917, + "num_input_tokens_seen": 2705344, + "step": 5510 + }, + { + "epoch": 0.7278606308565395, + "grad_norm": 1.4615646600723267, + "learning_rate": 1.9873954709961956e-06, + "loss": 0.0201, + "num_input_tokens_seen": 2707520, + "step": 5515 + }, + { + "epoch": 0.728520522634288, + "grad_norm": 13.904537200927734, + "learning_rate": 1.987322450371608e-06, + "loss": 0.1724, + "num_input_tokens_seen": 2709888, + "step": 5520 + }, + { + "epoch": 0.7291804144120364, + "grad_norm": 0.05545727536082268, + "learning_rate": 1.9872492201949807e-06, + "loss": 0.2705, + "num_input_tokens_seen": 2712192, + "step": 5525 + }, + { + "epoch": 0.7298403061897849, + "grad_norm": 0.2653372585773468, + "learning_rate": 1.9871757804818546e-06, + "loss": 0.0019, + "num_input_tokens_seen": 2714368, + "step": 5530 + }, + { + "epoch": 0.7305001979675333, + "grad_norm": 0.0897333025932312, + "learning_rate": 1.9871021312478183e-06, + "loss": 0.1082, + "num_input_tokens_seen": 2716608, + "step": 5535 + }, + { + "epoch": 0.7311600897452818, + "grad_norm": 0.06179777905344963, + "learning_rate": 1.9870282725085025e-06, + "loss": 0.0082, + "num_input_tokens_seen": 2718656, + "step": 5540 + }, + { + "epoch": 0.7318199815230302, + "grad_norm": 0.04015703871846199, + "learning_rate": 1.9869542042795832e-06, + "loss": 0.104, + "num_input_tokens_seen": 2721152, + "step": 5545 + }, + { + "epoch": 0.7324798733007787, + "grad_norm": 37.48463439941406, + "learning_rate": 1.9868799265767814e-06, + "loss": 0.0037, + "num_input_tokens_seen": 2723264, + "step": 5550 + }, + { + "epoch": 0.7331397650785271, + "grad_norm": 14.808982849121094, + "learning_rate": 1.986805439415861e-06, + "loss": 0.268, + "num_input_tokens_seen": 2725568, + "step": 5555 + }, + { + "epoch": 0.7337996568562756, + "grad_norm": 0.08154232054948807, + "learning_rate": 1.9867307428126327e-06, + "loss": 0.1503, + "num_input_tokens_seen": 2728192, + "step": 5560 + }, + { + "epoch": 0.7344595486340241, + "grad_norm": 18.187698364257812, + "learning_rate": 1.9866558367829493e-06, + "loss": 0.2448, + "num_input_tokens_seen": 2731072, + "step": 5565 + }, + { + "epoch": 0.7351194404117725, + "grad_norm": 0.0977545753121376, + "learning_rate": 1.986580721342709e-06, + "loss": 0.123, + "num_input_tokens_seen": 2733440, + "step": 5570 + }, + { + "epoch": 0.735779332189521, + "grad_norm": 10.494301795959473, + "learning_rate": 1.986505396507855e-06, + "loss": 0.1279, + "num_input_tokens_seen": 2736064, + "step": 5575 + }, + { + "epoch": 0.7364392239672694, + "grad_norm": 41.61827087402344, + "learning_rate": 1.9864298622943747e-06, + "loss": 0.0323, + "num_input_tokens_seen": 2738496, + "step": 5580 + }, + { + "epoch": 0.7370991157450179, + "grad_norm": 1.2770682573318481, + "learning_rate": 1.986354118718299e-06, + "loss": 0.0531, + "num_input_tokens_seen": 2740800, + "step": 5585 + }, + { + "epoch": 0.7377590075227662, + "grad_norm": 0.4450221359729767, + "learning_rate": 1.9862781657957043e-06, + "loss": 0.0734, + "num_input_tokens_seen": 2743104, + "step": 5590 + }, + { + "epoch": 0.7384188993005147, + "grad_norm": 3.060678482055664, + "learning_rate": 1.986202003542711e-06, + "loss": 0.164, + "num_input_tokens_seen": 2745344, + "step": 5595 + }, + { + "epoch": 0.7390787910782631, + "grad_norm": 38.93354034423828, + "learning_rate": 1.9861256319754836e-06, + "loss": 0.0798, + "num_input_tokens_seen": 2747520, + "step": 5600 + }, + { + "epoch": 0.7397386828560116, + "grad_norm": 0.26881110668182373, + "learning_rate": 1.986049051110232e-06, + "loss": 0.0556, + "num_input_tokens_seen": 2750016, + "step": 5605 + }, + { + "epoch": 0.74039857463376, + "grad_norm": 0.3444020748138428, + "learning_rate": 1.9859722609632097e-06, + "loss": 0.165, + "num_input_tokens_seen": 2752704, + "step": 5610 + }, + { + "epoch": 0.7410584664115085, + "grad_norm": 17.72441291809082, + "learning_rate": 1.985895261550715e-06, + "loss": 0.1732, + "num_input_tokens_seen": 2755328, + "step": 5615 + }, + { + "epoch": 0.7417183581892569, + "grad_norm": 17.03912925720215, + "learning_rate": 1.9858180528890898e-06, + "loss": 0.1728, + "num_input_tokens_seen": 2757632, + "step": 5620 + }, + { + "epoch": 0.7423782499670054, + "grad_norm": 0.28609731793403625, + "learning_rate": 1.985740634994722e-06, + "loss": 0.0655, + "num_input_tokens_seen": 2760192, + "step": 5625 + }, + { + "epoch": 0.7430381417447539, + "grad_norm": 0.18844915926456451, + "learning_rate": 1.985663007884043e-06, + "loss": 0.0018, + "num_input_tokens_seen": 2762816, + "step": 5630 + }, + { + "epoch": 0.7436980335225023, + "grad_norm": 13.197314262390137, + "learning_rate": 1.9855851715735275e-06, + "loss": 0.0711, + "num_input_tokens_seen": 2765120, + "step": 5635 + }, + { + "epoch": 0.7443579253002508, + "grad_norm": 0.40209174156188965, + "learning_rate": 1.985507126079697e-06, + "loss": 0.0933, + "num_input_tokens_seen": 2767808, + "step": 5640 + }, + { + "epoch": 0.7450178170779992, + "grad_norm": 0.0401005819439888, + "learning_rate": 1.985428871419115e-06, + "loss": 0.0009, + "num_input_tokens_seen": 2770176, + "step": 5645 + }, + { + "epoch": 0.7456777088557477, + "grad_norm": 32.41221237182617, + "learning_rate": 1.9853504076083914e-06, + "loss": 0.1552, + "num_input_tokens_seen": 2772672, + "step": 5650 + }, + { + "epoch": 0.7463376006334961, + "grad_norm": 0.06597806513309479, + "learning_rate": 1.985271734664179e-06, + "loss": 0.1258, + "num_input_tokens_seen": 2775104, + "step": 5655 + }, + { + "epoch": 0.7469974924112446, + "grad_norm": 85.4188232421875, + "learning_rate": 1.985192852603175e-06, + "loss": 0.3175, + "num_input_tokens_seen": 2777792, + "step": 5660 + }, + { + "epoch": 0.747657384188993, + "grad_norm": 36.49203109741211, + "learning_rate": 1.9851137614421234e-06, + "loss": 0.2089, + "num_input_tokens_seen": 2780416, + "step": 5665 + }, + { + "epoch": 0.7483172759667415, + "grad_norm": 0.09846457839012146, + "learning_rate": 1.9850344611978085e-06, + "loss": 0.0021, + "num_input_tokens_seen": 2783232, + "step": 5670 + }, + { + "epoch": 0.7489771677444899, + "grad_norm": 2.6694984436035156, + "learning_rate": 1.984954951887063e-06, + "loss": 0.1406, + "num_input_tokens_seen": 2785664, + "step": 5675 + }, + { + "epoch": 0.7496370595222384, + "grad_norm": 35.99433517456055, + "learning_rate": 1.984875233526761e-06, + "loss": 0.0632, + "num_input_tokens_seen": 2788224, + "step": 5680 + }, + { + "epoch": 0.7502969512999867, + "grad_norm": 0.10405652970075607, + "learning_rate": 1.984795306133823e-06, + "loss": 0.0028, + "num_input_tokens_seen": 2790656, + "step": 5685 + }, + { + "epoch": 0.7502969512999867, + "eval_loss": 0.09698151051998138, + "eval_runtime": 7.8183, + "eval_samples_per_second": 861.444, + "eval_steps_per_second": 107.696, + "num_input_tokens_seen": 2790656, + "step": 5685 + }, + { + "epoch": 0.7509568430777352, + "grad_norm": 0.023251961916685104, + "learning_rate": 1.984715169725212e-06, + "loss": 0.0287, + "num_input_tokens_seen": 2792960, + "step": 5690 + }, + { + "epoch": 0.7516167348554837, + "grad_norm": 124.132568359375, + "learning_rate": 1.9846348243179373e-06, + "loss": 0.0862, + "num_input_tokens_seen": 2795648, + "step": 5695 + }, + { + "epoch": 0.7522766266332321, + "grad_norm": 22.3035888671875, + "learning_rate": 1.9845542699290516e-06, + "loss": 0.0883, + "num_input_tokens_seen": 2797696, + "step": 5700 + }, + { + "epoch": 0.7529365184109806, + "grad_norm": 16.672033309936523, + "learning_rate": 1.9844735065756513e-06, + "loss": 0.1298, + "num_input_tokens_seen": 2800192, + "step": 5705 + }, + { + "epoch": 0.753596410188729, + "grad_norm": 0.1386905014514923, + "learning_rate": 1.984392534274878e-06, + "loss": 0.0658, + "num_input_tokens_seen": 2802560, + "step": 5710 + }, + { + "epoch": 0.7542563019664775, + "grad_norm": 0.1372869610786438, + "learning_rate": 1.9843113530439184e-06, + "loss": 0.2382, + "num_input_tokens_seen": 2804992, + "step": 5715 + }, + { + "epoch": 0.7549161937442259, + "grad_norm": 0.30530741810798645, + "learning_rate": 1.9842299629000014e-06, + "loss": 0.2144, + "num_input_tokens_seen": 2807296, + "step": 5720 + }, + { + "epoch": 0.7555760855219744, + "grad_norm": 55.86363220214844, + "learning_rate": 1.9841483638604025e-06, + "loss": 0.1445, + "num_input_tokens_seen": 2809984, + "step": 5725 + }, + { + "epoch": 0.7562359772997228, + "grad_norm": 0.07656501233577728, + "learning_rate": 1.9840665559424395e-06, + "loss": 0.0021, + "num_input_tokens_seen": 2812736, + "step": 5730 + }, + { + "epoch": 0.7568958690774713, + "grad_norm": 0.051343828439712524, + "learning_rate": 1.9839845391634764e-06, + "loss": 0.1602, + "num_input_tokens_seen": 2815040, + "step": 5735 + }, + { + "epoch": 0.7575557608552197, + "grad_norm": 0.09367392212152481, + "learning_rate": 1.9839023135409203e-06, + "loss": 0.1313, + "num_input_tokens_seen": 2817344, + "step": 5740 + }, + { + "epoch": 0.7582156526329682, + "grad_norm": 0.057923562824726105, + "learning_rate": 1.983819879092223e-06, + "loss": 0.0919, + "num_input_tokens_seen": 2819648, + "step": 5745 + }, + { + "epoch": 0.7588755444107167, + "grad_norm": 0.2078000158071518, + "learning_rate": 1.9837372358348804e-06, + "loss": 0.2254, + "num_input_tokens_seen": 2822464, + "step": 5750 + }, + { + "epoch": 0.7595354361884651, + "grad_norm": 6.0667009353637695, + "learning_rate": 1.9836543837864332e-06, + "loss": 0.1121, + "num_input_tokens_seen": 2824896, + "step": 5755 + }, + { + "epoch": 0.7601953279662136, + "grad_norm": 3.583667278289795, + "learning_rate": 1.9835713229644663e-06, + "loss": 0.1378, + "num_input_tokens_seen": 2827648, + "step": 5760 + }, + { + "epoch": 0.760855219743962, + "grad_norm": 13.19372272491455, + "learning_rate": 1.983488053386608e-06, + "loss": 0.1264, + "num_input_tokens_seen": 2830336, + "step": 5765 + }, + { + "epoch": 0.7615151115217105, + "grad_norm": 16.9455509185791, + "learning_rate": 1.983404575070533e-06, + "loss": 0.039, + "num_input_tokens_seen": 2832640, + "step": 5770 + }, + { + "epoch": 0.7621750032994589, + "grad_norm": 0.371663898229599, + "learning_rate": 1.9833208880339576e-06, + "loss": 0.0268, + "num_input_tokens_seen": 2834880, + "step": 5775 + }, + { + "epoch": 0.7628348950772074, + "grad_norm": 0.05054211616516113, + "learning_rate": 1.983236992294645e-06, + "loss": 0.1555, + "num_input_tokens_seen": 2837440, + "step": 5780 + }, + { + "epoch": 0.7634947868549558, + "grad_norm": 14.877882957458496, + "learning_rate": 1.9831528878704003e-06, + "loss": 0.1095, + "num_input_tokens_seen": 2839808, + "step": 5785 + }, + { + "epoch": 0.7641546786327043, + "grad_norm": 0.0845484808087349, + "learning_rate": 1.983068574779075e-06, + "loss": 0.1398, + "num_input_tokens_seen": 2842432, + "step": 5790 + }, + { + "epoch": 0.7648145704104526, + "grad_norm": 0.18073433637619019, + "learning_rate": 1.9829840530385633e-06, + "loss": 0.1598, + "num_input_tokens_seen": 2845120, + "step": 5795 + }, + { + "epoch": 0.7654744621882011, + "grad_norm": 170.3042755126953, + "learning_rate": 1.9828993226668046e-06, + "loss": 0.0721, + "num_input_tokens_seen": 2848000, + "step": 5800 + }, + { + "epoch": 0.7661343539659495, + "grad_norm": 43.15339660644531, + "learning_rate": 1.982814383681782e-06, + "loss": 0.1805, + "num_input_tokens_seen": 2850624, + "step": 5805 + }, + { + "epoch": 0.766794245743698, + "grad_norm": 0.265320360660553, + "learning_rate": 1.9827292361015235e-06, + "loss": 0.1815, + "num_input_tokens_seen": 2852992, + "step": 5810 + }, + { + "epoch": 0.7674541375214465, + "grad_norm": 140.9707489013672, + "learning_rate": 1.9826438799441016e-06, + "loss": 0.0437, + "num_input_tokens_seen": 2855424, + "step": 5815 + }, + { + "epoch": 0.7681140292991949, + "grad_norm": 0.3222537040710449, + "learning_rate": 1.982558315227631e-06, + "loss": 0.147, + "num_input_tokens_seen": 2857984, + "step": 5820 + }, + { + "epoch": 0.7687739210769434, + "grad_norm": 0.1431400626897812, + "learning_rate": 1.982472541970274e-06, + "loss": 0.0712, + "num_input_tokens_seen": 2860672, + "step": 5825 + }, + { + "epoch": 0.7694338128546918, + "grad_norm": 12.281307220458984, + "learning_rate": 1.9823865601902337e-06, + "loss": 0.21, + "num_input_tokens_seen": 2863040, + "step": 5830 + }, + { + "epoch": 0.7700937046324403, + "grad_norm": 40.90239715576172, + "learning_rate": 1.9823003699057607e-06, + "loss": 0.1239, + "num_input_tokens_seen": 2865856, + "step": 5835 + }, + { + "epoch": 0.7707535964101887, + "grad_norm": 0.3467998206615448, + "learning_rate": 1.9822139711351465e-06, + "loss": 0.1, + "num_input_tokens_seen": 2868096, + "step": 5840 + }, + { + "epoch": 0.7714134881879372, + "grad_norm": 0.14333048462867737, + "learning_rate": 1.9821273638967304e-06, + "loss": 0.0024, + "num_input_tokens_seen": 2870784, + "step": 5845 + }, + { + "epoch": 0.7720733799656856, + "grad_norm": 65.02461242675781, + "learning_rate": 1.9820405482088927e-06, + "loss": 0.0828, + "num_input_tokens_seen": 2873216, + "step": 5850 + }, + { + "epoch": 0.7727332717434341, + "grad_norm": 0.33633705973625183, + "learning_rate": 1.9819535240900606e-06, + "loss": 0.001, + "num_input_tokens_seen": 2875776, + "step": 5855 + }, + { + "epoch": 0.7733931635211825, + "grad_norm": 0.0250334981828928, + "learning_rate": 1.9818662915587036e-06, + "loss": 0.0624, + "num_input_tokens_seen": 2878336, + "step": 5860 + }, + { + "epoch": 0.774053055298931, + "grad_norm": 16.731359481811523, + "learning_rate": 1.981778850633336e-06, + "loss": 0.2229, + "num_input_tokens_seen": 2880896, + "step": 5865 + }, + { + "epoch": 0.7747129470766794, + "grad_norm": 0.014623081311583519, + "learning_rate": 1.981691201332517e-06, + "loss": 0.0652, + "num_input_tokens_seen": 2883648, + "step": 5870 + }, + { + "epoch": 0.7753728388544279, + "grad_norm": 0.05482471361756325, + "learning_rate": 1.9816033436748495e-06, + "loss": 0.0585, + "num_input_tokens_seen": 2885952, + "step": 5875 + }, + { + "epoch": 0.7760327306321764, + "grad_norm": 0.3382435739040375, + "learning_rate": 1.98151527767898e-06, + "loss": 0.079, + "num_input_tokens_seen": 2888576, + "step": 5880 + }, + { + "epoch": 0.7766926224099248, + "grad_norm": 0.5653005242347717, + "learning_rate": 1.981427003363601e-06, + "loss": 0.1387, + "num_input_tokens_seen": 2891136, + "step": 5885 + }, + { + "epoch": 0.7773525141876733, + "grad_norm": 39.504642486572266, + "learning_rate": 1.9813385207474472e-06, + "loss": 0.1429, + "num_input_tokens_seen": 2893696, + "step": 5890 + }, + { + "epoch": 0.7780124059654216, + "grad_norm": 1.0022053718566895, + "learning_rate": 1.981249829849299e-06, + "loss": 0.0546, + "num_input_tokens_seen": 2896512, + "step": 5895 + }, + { + "epoch": 0.7786722977431701, + "grad_norm": 28.317174911499023, + "learning_rate": 1.9811609306879798e-06, + "loss": 0.1847, + "num_input_tokens_seen": 2899008, + "step": 5900 + }, + { + "epoch": 0.7793321895209185, + "grad_norm": 0.7019644975662231, + "learning_rate": 1.9810718232823584e-06, + "loss": 0.0416, + "num_input_tokens_seen": 2901376, + "step": 5905 + }, + { + "epoch": 0.779992081298667, + "grad_norm": 13.494510650634766, + "learning_rate": 1.9809825076513462e-06, + "loss": 0.2391, + "num_input_tokens_seen": 2903872, + "step": 5910 + }, + { + "epoch": 0.7806519730764154, + "grad_norm": 0.11542707681655884, + "learning_rate": 1.980892983813901e-06, + "loss": 0.0021, + "num_input_tokens_seen": 2906240, + "step": 5915 + }, + { + "epoch": 0.7813118648541639, + "grad_norm": 44.78446578979492, + "learning_rate": 1.980803251789023e-06, + "loss": 0.1206, + "num_input_tokens_seen": 2908736, + "step": 5920 + }, + { + "epoch": 0.7819717566319123, + "grad_norm": 42.42718505859375, + "learning_rate": 1.980713311595757e-06, + "loss": 0.1592, + "num_input_tokens_seen": 2911104, + "step": 5925 + }, + { + "epoch": 0.7826316484096608, + "grad_norm": 0.17716114223003387, + "learning_rate": 1.980623163253192e-06, + "loss": 0.0998, + "num_input_tokens_seen": 2913472, + "step": 5930 + }, + { + "epoch": 0.7832915401874093, + "grad_norm": 67.53726196289062, + "learning_rate": 1.9805328067804626e-06, + "loss": 0.1875, + "num_input_tokens_seen": 2915840, + "step": 5935 + }, + { + "epoch": 0.7839514319651577, + "grad_norm": 0.056678541004657745, + "learning_rate": 1.980442242196745e-06, + "loss": 0.0014, + "num_input_tokens_seen": 2918144, + "step": 5940 + }, + { + "epoch": 0.7846113237429062, + "grad_norm": 0.0669153556227684, + "learning_rate": 1.9803514695212613e-06, + "loss": 0.1515, + "num_input_tokens_seen": 2920768, + "step": 5945 + }, + { + "epoch": 0.7852712155206546, + "grad_norm": 0.037776295095682144, + "learning_rate": 1.9802604887732773e-06, + "loss": 0.093, + "num_input_tokens_seen": 2923136, + "step": 5950 + }, + { + "epoch": 0.7859311072984031, + "grad_norm": 0.15245310962200165, + "learning_rate": 1.980169299972103e-06, + "loss": 0.1338, + "num_input_tokens_seen": 2925568, + "step": 5955 + }, + { + "epoch": 0.7865909990761515, + "grad_norm": 0.14270982146263123, + "learning_rate": 1.980077903137093e-06, + "loss": 0.0132, + "num_input_tokens_seen": 2928064, + "step": 5960 + }, + { + "epoch": 0.7872508908539, + "grad_norm": 13.677779197692871, + "learning_rate": 1.979986298287645e-06, + "loss": 0.1477, + "num_input_tokens_seen": 2930368, + "step": 5965 + }, + { + "epoch": 0.7879107826316484, + "grad_norm": 15.470702171325684, + "learning_rate": 1.979894485443201e-06, + "loss": 0.0939, + "num_input_tokens_seen": 2932928, + "step": 5970 + }, + { + "epoch": 0.7885706744093969, + "grad_norm": 191.1402130126953, + "learning_rate": 1.9798024646232495e-06, + "loss": 0.2729, + "num_input_tokens_seen": 2935360, + "step": 5975 + }, + { + "epoch": 0.7892305661871453, + "grad_norm": 148.3045196533203, + "learning_rate": 1.9797102358473195e-06, + "loss": 0.0693, + "num_input_tokens_seen": 2937920, + "step": 5980 + }, + { + "epoch": 0.7898904579648938, + "grad_norm": 18.475128173828125, + "learning_rate": 1.979617799134986e-06, + "loss": 0.2579, + "num_input_tokens_seen": 2940224, + "step": 5985 + }, + { + "epoch": 0.7905503497426422, + "grad_norm": 57.13227462768555, + "learning_rate": 1.979525154505869e-06, + "loss": 0.0073, + "num_input_tokens_seen": 2942848, + "step": 5990 + }, + { + "epoch": 0.7912102415203907, + "grad_norm": 0.4409717917442322, + "learning_rate": 1.979432301979631e-06, + "loss": 0.1256, + "num_input_tokens_seen": 2945344, + "step": 5995 + }, + { + "epoch": 0.7918701332981392, + "grad_norm": 15.219269752502441, + "learning_rate": 1.9793392415759796e-06, + "loss": 0.0084, + "num_input_tokens_seen": 2947840, + "step": 6000 + }, + { + "epoch": 0.7925300250758875, + "grad_norm": 0.10808387398719788, + "learning_rate": 1.979245973314666e-06, + "loss": 0.1543, + "num_input_tokens_seen": 2950144, + "step": 6005 + }, + { + "epoch": 0.793189916853636, + "grad_norm": 306.9264221191406, + "learning_rate": 1.9791524972154856e-06, + "loss": 0.0351, + "num_input_tokens_seen": 2952384, + "step": 6010 + }, + { + "epoch": 0.7938498086313844, + "grad_norm": 0.06602656096220016, + "learning_rate": 1.979058813298278e-06, + "loss": 0.2565, + "num_input_tokens_seen": 2955136, + "step": 6015 + }, + { + "epoch": 0.7945097004091329, + "grad_norm": 1.304447889328003, + "learning_rate": 1.978964921582927e-06, + "loss": 0.0011, + "num_input_tokens_seen": 2957824, + "step": 6020 + }, + { + "epoch": 0.7951695921868813, + "grad_norm": 0.026650432497262955, + "learning_rate": 1.9788708220893608e-06, + "loss": 0.063, + "num_input_tokens_seen": 2960256, + "step": 6025 + }, + { + "epoch": 0.7958294839646298, + "grad_norm": 0.8960063457489014, + "learning_rate": 1.9787765148375506e-06, + "loss": 0.19, + "num_input_tokens_seen": 2962944, + "step": 6030 + }, + { + "epoch": 0.7964893757423782, + "grad_norm": 126.94390869140625, + "learning_rate": 1.978681999847513e-06, + "loss": 0.2955, + "num_input_tokens_seen": 2965504, + "step": 6035 + }, + { + "epoch": 0.7971492675201267, + "grad_norm": 0.08152367174625397, + "learning_rate": 1.9785872771393084e-06, + "loss": 0.3805, + "num_input_tokens_seen": 2967744, + "step": 6040 + }, + { + "epoch": 0.7978091592978751, + "grad_norm": 34.75372314453125, + "learning_rate": 1.9784923467330403e-06, + "loss": 0.0549, + "num_input_tokens_seen": 2970240, + "step": 6045 + }, + { + "epoch": 0.7984690510756236, + "grad_norm": 0.5853905081748962, + "learning_rate": 1.9783972086488573e-06, + "loss": 0.2836, + "num_input_tokens_seen": 2972928, + "step": 6050 + }, + { + "epoch": 0.799128942853372, + "grad_norm": 0.08218041062355042, + "learning_rate": 1.9783018629069516e-06, + "loss": 0.0879, + "num_input_tokens_seen": 2975168, + "step": 6055 + }, + { + "epoch": 0.7997888346311205, + "grad_norm": 1.292216420173645, + "learning_rate": 1.97820630952756e-06, + "loss": 0.1121, + "num_input_tokens_seen": 2977408, + "step": 6060 + }, + { + "epoch": 0.800448726408869, + "grad_norm": 0.052405282855033875, + "learning_rate": 1.978110548530963e-06, + "loss": 0.0025, + "num_input_tokens_seen": 2979968, + "step": 6065 + }, + { + "epoch": 0.8011086181866174, + "grad_norm": 39.24075698852539, + "learning_rate": 1.9780145799374846e-06, + "loss": 0.2776, + "num_input_tokens_seen": 2982528, + "step": 6070 + }, + { + "epoch": 0.8017685099643659, + "grad_norm": 0.5942057967185974, + "learning_rate": 1.977918403767494e-06, + "loss": 0.0743, + "num_input_tokens_seen": 2984832, + "step": 6075 + }, + { + "epoch": 0.8024284017421143, + "grad_norm": 24.79922866821289, + "learning_rate": 1.9778220200414036e-06, + "loss": 0.0383, + "num_input_tokens_seen": 2987328, + "step": 6080 + }, + { + "epoch": 0.8030882935198628, + "grad_norm": 0.13059140741825104, + "learning_rate": 1.9777254287796706e-06, + "loss": 0.0037, + "num_input_tokens_seen": 2989760, + "step": 6085 + }, + { + "epoch": 0.8037481852976112, + "grad_norm": 0.6647312641143799, + "learning_rate": 1.9776286300027954e-06, + "loss": 0.0012, + "num_input_tokens_seen": 2992320, + "step": 6090 + }, + { + "epoch": 0.8044080770753597, + "grad_norm": 0.0258713997900486, + "learning_rate": 1.9775316237313225e-06, + "loss": 0.1335, + "num_input_tokens_seen": 2995136, + "step": 6095 + }, + { + "epoch": 0.805067968853108, + "grad_norm": 0.06406942009925842, + "learning_rate": 1.977434409985842e-06, + "loss": 0.0003, + "num_input_tokens_seen": 2998016, + "step": 6100 + }, + { + "epoch": 0.8057278606308566, + "grad_norm": 0.02696152776479721, + "learning_rate": 1.977336988786985e-06, + "loss": 0.2727, + "num_input_tokens_seen": 3000832, + "step": 6105 + }, + { + "epoch": 0.8063877524086049, + "grad_norm": 14.614540100097656, + "learning_rate": 1.97723936015543e-06, + "loss": 0.2474, + "num_input_tokens_seen": 3003584, + "step": 6110 + }, + { + "epoch": 0.8070476441863534, + "grad_norm": 91.05704498291016, + "learning_rate": 1.9771415241118972e-06, + "loss": 0.0878, + "num_input_tokens_seen": 3006464, + "step": 6115 + }, + { + "epoch": 0.8077075359641019, + "grad_norm": 0.38378047943115234, + "learning_rate": 1.9770434806771525e-06, + "loss": 0.1026, + "num_input_tokens_seen": 3008896, + "step": 6120 + }, + { + "epoch": 0.8083674277418503, + "grad_norm": 4.753599643707275, + "learning_rate": 1.976945229872003e-06, + "loss": 0.0518, + "num_input_tokens_seen": 3011392, + "step": 6125 + }, + { + "epoch": 0.8090273195195988, + "grad_norm": 23.890127182006836, + "learning_rate": 1.976846771717304e-06, + "loss": 0.2063, + "num_input_tokens_seen": 3014016, + "step": 6130 + }, + { + "epoch": 0.8096872112973472, + "grad_norm": 0.4210752546787262, + "learning_rate": 1.9767481062339512e-06, + "loss": 0.1909, + "num_input_tokens_seen": 3016576, + "step": 6135 + }, + { + "epoch": 0.8103471030750957, + "grad_norm": 67.47174072265625, + "learning_rate": 1.976649233442886e-06, + "loss": 0.0153, + "num_input_tokens_seen": 3019008, + "step": 6140 + }, + { + "epoch": 0.8110069948528441, + "grad_norm": 1.1742627620697021, + "learning_rate": 1.976550153365093e-06, + "loss": 0.0678, + "num_input_tokens_seen": 3021504, + "step": 6145 + }, + { + "epoch": 0.8116668866305926, + "grad_norm": 2.150078773498535, + "learning_rate": 1.9764508660216018e-06, + "loss": 0.0594, + "num_input_tokens_seen": 3023552, + "step": 6150 + }, + { + "epoch": 0.812326778408341, + "grad_norm": 56.31321716308594, + "learning_rate": 1.976351371433485e-06, + "loss": 0.1778, + "num_input_tokens_seen": 3025856, + "step": 6155 + }, + { + "epoch": 0.8129866701860895, + "grad_norm": 44.1425895690918, + "learning_rate": 1.9762516696218598e-06, + "loss": 0.2057, + "num_input_tokens_seen": 3028096, + "step": 6160 + }, + { + "epoch": 0.8136465619638379, + "grad_norm": 25.15863800048828, + "learning_rate": 1.9761517606078873e-06, + "loss": 0.3517, + "num_input_tokens_seen": 3030528, + "step": 6165 + }, + { + "epoch": 0.8143064537415864, + "grad_norm": 36.221038818359375, + "learning_rate": 1.9760516444127722e-06, + "loss": 0.2465, + "num_input_tokens_seen": 3033088, + "step": 6170 + }, + { + "epoch": 0.8149663455193348, + "grad_norm": 0.5490625500679016, + "learning_rate": 1.975951321057764e-06, + "loss": 0.0653, + "num_input_tokens_seen": 3035200, + "step": 6175 + }, + { + "epoch": 0.8156262372970833, + "grad_norm": 0.20859502255916595, + "learning_rate": 1.975850790564155e-06, + "loss": 0.0755, + "num_input_tokens_seen": 3037696, + "step": 6180 + }, + { + "epoch": 0.8162861290748318, + "grad_norm": 11.634245872497559, + "learning_rate": 1.9757500529532817e-06, + "loss": 0.1064, + "num_input_tokens_seen": 3040128, + "step": 6185 + }, + { + "epoch": 0.8169460208525802, + "grad_norm": 0.895517110824585, + "learning_rate": 1.975649108246526e-06, + "loss": 0.1667, + "num_input_tokens_seen": 3042560, + "step": 6190 + }, + { + "epoch": 0.8176059126303287, + "grad_norm": 33.11262512207031, + "learning_rate": 1.9755479564653123e-06, + "loss": 0.2541, + "num_input_tokens_seen": 3044800, + "step": 6195 + }, + { + "epoch": 0.8182658044080771, + "grad_norm": 16.164127349853516, + "learning_rate": 1.975446597631109e-06, + "loss": 0.124, + "num_input_tokens_seen": 3047040, + "step": 6200 + }, + { + "epoch": 0.8189256961858256, + "grad_norm": 0.22055456042289734, + "learning_rate": 1.975345031765429e-06, + "loss": 0.064, + "num_input_tokens_seen": 3049600, + "step": 6205 + }, + { + "epoch": 0.819585587963574, + "grad_norm": 1.1324610710144043, + "learning_rate": 1.975243258889829e-06, + "loss": 0.1276, + "num_input_tokens_seen": 3052416, + "step": 6210 + }, + { + "epoch": 0.8202454797413224, + "grad_norm": 0.5565292835235596, + "learning_rate": 1.9751412790259093e-06, + "loss": 0.0928, + "num_input_tokens_seen": 3055040, + "step": 6215 + }, + { + "epoch": 0.8209053715190708, + "grad_norm": 32.02667999267578, + "learning_rate": 1.9750390921953144e-06, + "loss": 0.0983, + "num_input_tokens_seen": 3057856, + "step": 6220 + }, + { + "epoch": 0.8215652632968193, + "grad_norm": 1.3813210725784302, + "learning_rate": 1.9749366984197335e-06, + "loss": 0.2008, + "num_input_tokens_seen": 3060160, + "step": 6225 + }, + { + "epoch": 0.8222251550745677, + "grad_norm": 2.9544014930725098, + "learning_rate": 1.9748340977208975e-06, + "loss": 0.1972, + "num_input_tokens_seen": 3062592, + "step": 6230 + }, + { + "epoch": 0.8228850468523162, + "grad_norm": 11.683035850524902, + "learning_rate": 1.9747312901205837e-06, + "loss": 0.0591, + "num_input_tokens_seen": 3065088, + "step": 6235 + }, + { + "epoch": 0.8235449386300646, + "grad_norm": 0.14474692940711975, + "learning_rate": 1.9746282756406126e-06, + "loss": 0.0013, + "num_input_tokens_seen": 3067712, + "step": 6240 + }, + { + "epoch": 0.8242048304078131, + "grad_norm": 30.500009536743164, + "learning_rate": 1.974525054302847e-06, + "loss": 0.1508, + "num_input_tokens_seen": 3070144, + "step": 6245 + }, + { + "epoch": 0.8248647221855616, + "grad_norm": 0.25611788034439087, + "learning_rate": 1.974421626129196e-06, + "loss": 0.2101, + "num_input_tokens_seen": 3072448, + "step": 6250 + }, + { + "epoch": 0.82552461396331, + "grad_norm": 0.5064347982406616, + "learning_rate": 1.9743179911416104e-06, + "loss": 0.1979, + "num_input_tokens_seen": 3075072, + "step": 6255 + }, + { + "epoch": 0.8261845057410585, + "grad_norm": 0.08567559719085693, + "learning_rate": 1.9742141493620876e-06, + "loss": 0.1248, + "num_input_tokens_seen": 3077376, + "step": 6260 + }, + { + "epoch": 0.8268443975188069, + "grad_norm": 39.92914581298828, + "learning_rate": 1.9741101008126655e-06, + "loss": 0.2122, + "num_input_tokens_seen": 3079808, + "step": 6265 + }, + { + "epoch": 0.8275042892965554, + "grad_norm": 1.009863018989563, + "learning_rate": 1.974005845515429e-06, + "loss": 0.0026, + "num_input_tokens_seen": 3082560, + "step": 6270 + }, + { + "epoch": 0.8281641810743038, + "grad_norm": 26.14215087890625, + "learning_rate": 1.9739013834925047e-06, + "loss": 0.1156, + "num_input_tokens_seen": 3084608, + "step": 6275 + }, + { + "epoch": 0.8288240728520523, + "grad_norm": 18.71134376525879, + "learning_rate": 1.973796714766064e-06, + "loss": 0.3242, + "num_input_tokens_seen": 3087104, + "step": 6280 + }, + { + "epoch": 0.8294839646298007, + "grad_norm": 0.44969964027404785, + "learning_rate": 1.973691839358323e-06, + "loss": 0.0915, + "num_input_tokens_seen": 3089408, + "step": 6285 + }, + { + "epoch": 0.8301438564075492, + "grad_norm": 110.7673110961914, + "learning_rate": 1.973586757291539e-06, + "loss": 0.054, + "num_input_tokens_seen": 3091776, + "step": 6290 + }, + { + "epoch": 0.8308037481852976, + "grad_norm": 0.1554386168718338, + "learning_rate": 1.973481468588017e-06, + "loss": 0.1558, + "num_input_tokens_seen": 3094208, + "step": 6295 + }, + { + "epoch": 0.8314636399630461, + "grad_norm": 0.08468946814537048, + "learning_rate": 1.973375973270102e-06, + "loss": 0.0414, + "num_input_tokens_seen": 3096768, + "step": 6300 + }, + { + "epoch": 0.8321235317407946, + "grad_norm": 0.7825853824615479, + "learning_rate": 1.973270271360185e-06, + "loss": 0.0018, + "num_input_tokens_seen": 3099456, + "step": 6305 + }, + { + "epoch": 0.832783423518543, + "grad_norm": 0.31845423579216003, + "learning_rate": 1.9731643628807014e-06, + "loss": 0.1685, + "num_input_tokens_seen": 3102208, + "step": 6310 + }, + { + "epoch": 0.8334433152962915, + "grad_norm": 0.06811577826738358, + "learning_rate": 1.973058247854129e-06, + "loss": 0.0576, + "num_input_tokens_seen": 3104896, + "step": 6315 + }, + { + "epoch": 0.8341032070740398, + "grad_norm": 0.04747435823082924, + "learning_rate": 1.9729519263029895e-06, + "loss": 0.1591, + "num_input_tokens_seen": 3107520, + "step": 6320 + }, + { + "epoch": 0.8347630988517883, + "grad_norm": 0.05607318878173828, + "learning_rate": 1.972845398249849e-06, + "loss": 0.0462, + "num_input_tokens_seen": 3110144, + "step": 6325 + }, + { + "epoch": 0.8354229906295367, + "grad_norm": 0.04064086452126503, + "learning_rate": 1.972738663717318e-06, + "loss": 0.0463, + "num_input_tokens_seen": 3112768, + "step": 6330 + }, + { + "epoch": 0.8360828824072852, + "grad_norm": 0.023201411589980125, + "learning_rate": 1.9726317227280494e-06, + "loss": 0.0003, + "num_input_tokens_seen": 3115328, + "step": 6335 + }, + { + "epoch": 0.8367427741850336, + "grad_norm": 0.04389675334095955, + "learning_rate": 1.972524575304741e-06, + "loss": 0.0213, + "num_input_tokens_seen": 3117888, + "step": 6340 + }, + { + "epoch": 0.8374026659627821, + "grad_norm": 0.3593812584877014, + "learning_rate": 1.972417221470134e-06, + "loss": 0.0118, + "num_input_tokens_seen": 3120384, + "step": 6345 + }, + { + "epoch": 0.8380625577405305, + "grad_norm": 0.34914690256118774, + "learning_rate": 1.972309661247013e-06, + "loss": 0.1584, + "num_input_tokens_seen": 3123008, + "step": 6350 + }, + { + "epoch": 0.838722449518279, + "grad_norm": 0.03417549654841423, + "learning_rate": 1.9722018946582075e-06, + "loss": 0.0573, + "num_input_tokens_seen": 3125504, + "step": 6355 + }, + { + "epoch": 0.8393823412960274, + "grad_norm": 0.46451956033706665, + "learning_rate": 1.9720939217265904e-06, + "loss": 0.0681, + "num_input_tokens_seen": 3127744, + "step": 6360 + }, + { + "epoch": 0.8400422330737759, + "grad_norm": 0.0629674419760704, + "learning_rate": 1.9719857424750776e-06, + "loss": 0.1754, + "num_input_tokens_seen": 3130048, + "step": 6365 + }, + { + "epoch": 0.8407021248515244, + "grad_norm": 0.19810578227043152, + "learning_rate": 1.971877356926629e-06, + "loss": 0.0619, + "num_input_tokens_seen": 3132480, + "step": 6370 + }, + { + "epoch": 0.8413620166292728, + "grad_norm": 100.77786254882812, + "learning_rate": 1.9717687651042494e-06, + "loss": 0.136, + "num_input_tokens_seen": 3135104, + "step": 6375 + }, + { + "epoch": 0.8420219084070213, + "grad_norm": 48.25716781616211, + "learning_rate": 1.971659967030987e-06, + "loss": 0.2398, + "num_input_tokens_seen": 3137344, + "step": 6380 + }, + { + "epoch": 0.8426818001847697, + "grad_norm": 23.40747833251953, + "learning_rate": 1.9715509627299324e-06, + "loss": 0.2223, + "num_input_tokens_seen": 3140096, + "step": 6385 + }, + { + "epoch": 0.8433416919625182, + "grad_norm": 22.579530715942383, + "learning_rate": 1.971441752224221e-06, + "loss": 0.1451, + "num_input_tokens_seen": 3142400, + "step": 6390 + }, + { + "epoch": 0.8440015837402666, + "grad_norm": 1.4132654666900635, + "learning_rate": 1.971332335537033e-06, + "loss": 0.0571, + "num_input_tokens_seen": 3144512, + "step": 6395 + }, + { + "epoch": 0.8446614755180151, + "grad_norm": 16.25748634338379, + "learning_rate": 1.97122271269159e-06, + "loss": 0.1166, + "num_input_tokens_seen": 3146944, + "step": 6400 + }, + { + "epoch": 0.8453213672957635, + "grad_norm": 0.35819733142852783, + "learning_rate": 1.97111288371116e-06, + "loss": 0.1062, + "num_input_tokens_seen": 3149376, + "step": 6405 + }, + { + "epoch": 0.845981259073512, + "grad_norm": 0.4410839378833771, + "learning_rate": 1.9710028486190524e-06, + "loss": 0.1249, + "num_input_tokens_seen": 3151744, + "step": 6410 + }, + { + "epoch": 0.8466411508512603, + "grad_norm": 0.2627358138561249, + "learning_rate": 1.970892607438621e-06, + "loss": 0.039, + "num_input_tokens_seen": 3154112, + "step": 6415 + }, + { + "epoch": 0.8473010426290088, + "grad_norm": 0.37668725848197937, + "learning_rate": 1.970782160193265e-06, + "loss": 0.0129, + "num_input_tokens_seen": 3156480, + "step": 6420 + }, + { + "epoch": 0.8479609344067572, + "grad_norm": 17.34520721435547, + "learning_rate": 1.970671506906425e-06, + "loss": 0.2154, + "num_input_tokens_seen": 3158784, + "step": 6425 + }, + { + "epoch": 0.8486208261845057, + "grad_norm": 0.09452015161514282, + "learning_rate": 1.970560647601587e-06, + "loss": 0.1681, + "num_input_tokens_seen": 3161152, + "step": 6430 + }, + { + "epoch": 0.8492807179622542, + "grad_norm": 0.035893987864255905, + "learning_rate": 1.9704495823022797e-06, + "loss": 0.0015, + "num_input_tokens_seen": 3163776, + "step": 6435 + }, + { + "epoch": 0.8499406097400026, + "grad_norm": 35.052188873291016, + "learning_rate": 1.970338311032076e-06, + "loss": 0.1335, + "num_input_tokens_seen": 3166272, + "step": 6440 + }, + { + "epoch": 0.8506005015177511, + "grad_norm": 20.927108764648438, + "learning_rate": 1.970226833814592e-06, + "loss": 0.1466, + "num_input_tokens_seen": 3168640, + "step": 6445 + }, + { + "epoch": 0.8512603932954995, + "grad_norm": 0.10324529558420181, + "learning_rate": 1.970115150673489e-06, + "loss": 0.072, + "num_input_tokens_seen": 3171008, + "step": 6450 + }, + { + "epoch": 0.851920285073248, + "grad_norm": 0.2769117057323456, + "learning_rate": 1.97000326163247e-06, + "loss": 0.0785, + "num_input_tokens_seen": 3173312, + "step": 6455 + }, + { + "epoch": 0.8525801768509964, + "grad_norm": 0.13265232741832733, + "learning_rate": 1.969891166715283e-06, + "loss": 0.1788, + "num_input_tokens_seen": 3175808, + "step": 6460 + }, + { + "epoch": 0.8532400686287449, + "grad_norm": 0.49003279209136963, + "learning_rate": 1.969778865945719e-06, + "loss": 0.1182, + "num_input_tokens_seen": 3178048, + "step": 6465 + }, + { + "epoch": 0.8538999604064933, + "grad_norm": 0.14793264865875244, + "learning_rate": 1.969666359347614e-06, + "loss": 0.0031, + "num_input_tokens_seen": 3180544, + "step": 6470 + }, + { + "epoch": 0.8545598521842418, + "grad_norm": 11.453808784484863, + "learning_rate": 1.969553646944845e-06, + "loss": 0.268, + "num_input_tokens_seen": 3183040, + "step": 6475 + }, + { + "epoch": 0.8552197439619902, + "grad_norm": 0.1205844134092331, + "learning_rate": 1.969440728761336e-06, + "loss": 0.0905, + "num_input_tokens_seen": 3185664, + "step": 6480 + }, + { + "epoch": 0.8558796357397387, + "grad_norm": 17.57941436767578, + "learning_rate": 1.9693276048210524e-06, + "loss": 0.1175, + "num_input_tokens_seen": 3188672, + "step": 6485 + }, + { + "epoch": 0.8565395275174872, + "grad_norm": 42.01498031616211, + "learning_rate": 1.969214275148004e-06, + "loss": 0.0078, + "num_input_tokens_seen": 3191168, + "step": 6490 + }, + { + "epoch": 0.8571994192952356, + "grad_norm": 84.84487915039062, + "learning_rate": 1.9691007397662444e-06, + "loss": 0.2481, + "num_input_tokens_seen": 3193664, + "step": 6495 + }, + { + "epoch": 0.8578593110729841, + "grad_norm": 0.0309496708214283, + "learning_rate": 1.96898699869987e-06, + "loss": 0.0524, + "num_input_tokens_seen": 3196224, + "step": 6500 + }, + { + "epoch": 0.8585192028507325, + "grad_norm": 0.08695626258850098, + "learning_rate": 1.968873051973022e-06, + "loss": 0.1735, + "num_input_tokens_seen": 3198784, + "step": 6505 + }, + { + "epoch": 0.859179094628481, + "grad_norm": 0.0463121235370636, + "learning_rate": 1.968758899609885e-06, + "loss": 0.1, + "num_input_tokens_seen": 3201472, + "step": 6510 + }, + { + "epoch": 0.8598389864062294, + "grad_norm": 0.3663501739501953, + "learning_rate": 1.9686445416346866e-06, + "loss": 0.0387, + "num_input_tokens_seen": 3203584, + "step": 6515 + }, + { + "epoch": 0.8604988781839779, + "grad_norm": 44.38224411010742, + "learning_rate": 1.9685299780716988e-06, + "loss": 0.16, + "num_input_tokens_seen": 3205888, + "step": 6520 + }, + { + "epoch": 0.8611587699617262, + "grad_norm": 0.530706524848938, + "learning_rate": 1.968415208945237e-06, + "loss": 0.2118, + "num_input_tokens_seen": 3208000, + "step": 6525 + }, + { + "epoch": 0.8618186617394747, + "grad_norm": 0.151189386844635, + "learning_rate": 1.9683002342796594e-06, + "loss": 0.0009, + "num_input_tokens_seen": 3210240, + "step": 6530 + }, + { + "epoch": 0.8624785535172231, + "grad_norm": 54.15868377685547, + "learning_rate": 1.9681850540993687e-06, + "loss": 0.0847, + "num_input_tokens_seen": 3212672, + "step": 6535 + }, + { + "epoch": 0.8631384452949716, + "grad_norm": 11.121855735778809, + "learning_rate": 1.9680696684288116e-06, + "loss": 0.2278, + "num_input_tokens_seen": 3215360, + "step": 6540 + }, + { + "epoch": 0.86379833707272, + "grad_norm": 1.1879770755767822, + "learning_rate": 1.9679540772924773e-06, + "loss": 0.1291, + "num_input_tokens_seen": 3218112, + "step": 6545 + }, + { + "epoch": 0.8644582288504685, + "grad_norm": 0.7121301293373108, + "learning_rate": 1.9678382807149e-06, + "loss": 0.0677, + "num_input_tokens_seen": 3220288, + "step": 6550 + }, + { + "epoch": 0.865118120628217, + "grad_norm": 0.16042585670948029, + "learning_rate": 1.967722278720656e-06, + "loss": 0.0061, + "num_input_tokens_seen": 3222976, + "step": 6555 + }, + { + "epoch": 0.8657780124059654, + "grad_norm": 4.094472408294678, + "learning_rate": 1.967606071334366e-06, + "loss": 0.087, + "num_input_tokens_seen": 3225472, + "step": 6560 + }, + { + "epoch": 0.8664379041837139, + "grad_norm": 2.223787307739258, + "learning_rate": 1.9674896585806938e-06, + "loss": 0.2098, + "num_input_tokens_seen": 3228096, + "step": 6565 + }, + { + "epoch": 0.8670977959614623, + "grad_norm": 0.06424208730459213, + "learning_rate": 1.967373040484348e-06, + "loss": 0.1914, + "num_input_tokens_seen": 3230720, + "step": 6570 + }, + { + "epoch": 0.8677576877392108, + "grad_norm": 13.304986953735352, + "learning_rate": 1.9672562170700794e-06, + "loss": 0.1312, + "num_input_tokens_seen": 3233088, + "step": 6575 + }, + { + "epoch": 0.8684175795169592, + "grad_norm": 25.26539421081543, + "learning_rate": 1.967139188362683e-06, + "loss": 0.139, + "num_input_tokens_seen": 3235712, + "step": 6580 + }, + { + "epoch": 0.8690774712947077, + "grad_norm": 0.19319690763950348, + "learning_rate": 1.9670219543869977e-06, + "loss": 0.1531, + "num_input_tokens_seen": 3238528, + "step": 6585 + }, + { + "epoch": 0.8697373630724561, + "grad_norm": 18.5180606842041, + "learning_rate": 1.9669045151679045e-06, + "loss": 0.1389, + "num_input_tokens_seen": 3240896, + "step": 6590 + }, + { + "epoch": 0.8703972548502046, + "grad_norm": 0.2979770004749298, + "learning_rate": 1.9667868707303304e-06, + "loss": 0.0033, + "num_input_tokens_seen": 3243392, + "step": 6595 + }, + { + "epoch": 0.871057146627953, + "grad_norm": 0.1635628491640091, + "learning_rate": 1.966669021099244e-06, + "loss": 0.0405, + "num_input_tokens_seen": 3245824, + "step": 6600 + }, + { + "epoch": 0.8717170384057015, + "grad_norm": 2.8577051162719727, + "learning_rate": 1.966550966299657e-06, + "loss": 0.002, + "num_input_tokens_seen": 3248128, + "step": 6605 + }, + { + "epoch": 0.8723769301834499, + "grad_norm": 52.59244155883789, + "learning_rate": 1.9664327063566273e-06, + "loss": 0.2562, + "num_input_tokens_seen": 3250624, + "step": 6610 + }, + { + "epoch": 0.8730368219611984, + "grad_norm": 0.6827111840248108, + "learning_rate": 1.966314241295254e-06, + "loss": 0.1405, + "num_input_tokens_seen": 3253312, + "step": 6615 + }, + { + "epoch": 0.8736967137389469, + "grad_norm": 0.03325214982032776, + "learning_rate": 1.9661955711406808e-06, + "loss": 0.1581, + "num_input_tokens_seen": 3255488, + "step": 6620 + }, + { + "epoch": 0.8743566055166953, + "grad_norm": 0.028142258524894714, + "learning_rate": 1.966076695918094e-06, + "loss": 0.0712, + "num_input_tokens_seen": 3257664, + "step": 6625 + }, + { + "epoch": 0.8750164972944438, + "grad_norm": 0.07260460406541824, + "learning_rate": 1.9659576156527236e-06, + "loss": 0.0422, + "num_input_tokens_seen": 3260160, + "step": 6630 + }, + { + "epoch": 0.8756763890721921, + "grad_norm": 21.091014862060547, + "learning_rate": 1.965838330369845e-06, + "loss": 0.15, + "num_input_tokens_seen": 3262528, + "step": 6635 + }, + { + "epoch": 0.8763362808499406, + "grad_norm": 0.024985164403915405, + "learning_rate": 1.9657188400947748e-06, + "loss": 0.099, + "num_input_tokens_seen": 3265024, + "step": 6640 + }, + { + "epoch": 0.876996172627689, + "grad_norm": 0.18783406913280487, + "learning_rate": 1.965599144852874e-06, + "loss": 0.2838, + "num_input_tokens_seen": 3267456, + "step": 6645 + }, + { + "epoch": 0.8776560644054375, + "grad_norm": 0.43716302514076233, + "learning_rate": 1.9654792446695467e-06, + "loss": 0.0717, + "num_input_tokens_seen": 3270208, + "step": 6650 + }, + { + "epoch": 0.8783159561831859, + "grad_norm": 35.399932861328125, + "learning_rate": 1.9653591395702408e-06, + "loss": 0.1191, + "num_input_tokens_seen": 3272960, + "step": 6655 + }, + { + "epoch": 0.8789758479609344, + "grad_norm": 0.5945919752120972, + "learning_rate": 1.9652388295804484e-06, + "loss": 0.1331, + "num_input_tokens_seen": 3275136, + "step": 6660 + }, + { + "epoch": 0.8796357397386828, + "grad_norm": 29.954280853271484, + "learning_rate": 1.9651183147257046e-06, + "loss": 0.2028, + "num_input_tokens_seen": 3277696, + "step": 6665 + }, + { + "epoch": 0.8802956315164313, + "grad_norm": 12.914910316467285, + "learning_rate": 1.964997595031587e-06, + "loss": 0.1612, + "num_input_tokens_seen": 3280064, + "step": 6670 + }, + { + "epoch": 0.8809555232941798, + "grad_norm": 0.618593156337738, + "learning_rate": 1.964876670523718e-06, + "loss": 0.185, + "num_input_tokens_seen": 3282304, + "step": 6675 + }, + { + "epoch": 0.8816154150719282, + "grad_norm": 0.3972916007041931, + "learning_rate": 1.9647555412277623e-06, + "loss": 0.102, + "num_input_tokens_seen": 3284736, + "step": 6680 + }, + { + "epoch": 0.8822753068496767, + "grad_norm": 56.937686920166016, + "learning_rate": 1.9646342071694298e-06, + "loss": 0.0322, + "num_input_tokens_seen": 3287168, + "step": 6685 + }, + { + "epoch": 0.8829351986274251, + "grad_norm": 32.125244140625, + "learning_rate": 1.9645126683744718e-06, + "loss": 0.1026, + "num_input_tokens_seen": 3289600, + "step": 6690 + }, + { + "epoch": 0.8835950904051736, + "grad_norm": 0.03807664290070534, + "learning_rate": 1.9643909248686847e-06, + "loss": 0.0023, + "num_input_tokens_seen": 3292160, + "step": 6695 + }, + { + "epoch": 0.884254982182922, + "grad_norm": 12.510610580444336, + "learning_rate": 1.964268976677907e-06, + "loss": 0.1903, + "num_input_tokens_seen": 3294592, + "step": 6700 + }, + { + "epoch": 0.8849148739606705, + "grad_norm": 39.68159103393555, + "learning_rate": 1.964146823828022e-06, + "loss": 0.0425, + "num_input_tokens_seen": 3296960, + "step": 6705 + }, + { + "epoch": 0.8855747657384189, + "grad_norm": 0.1999286711215973, + "learning_rate": 1.9640244663449548e-06, + "loss": 0.035, + "num_input_tokens_seen": 3299200, + "step": 6710 + }, + { + "epoch": 0.8862346575161674, + "grad_norm": 10.900226593017578, + "learning_rate": 1.963901904254676e-06, + "loss": 0.2501, + "num_input_tokens_seen": 3301568, + "step": 6715 + }, + { + "epoch": 0.8868945492939158, + "grad_norm": 17.2080135345459, + "learning_rate": 1.963779137583198e-06, + "loss": 0.1129, + "num_input_tokens_seen": 3304064, + "step": 6720 + }, + { + "epoch": 0.8875544410716643, + "grad_norm": 0.17219261825084686, + "learning_rate": 1.963656166356577e-06, + "loss": 0.1272, + "num_input_tokens_seen": 3306432, + "step": 6725 + }, + { + "epoch": 0.8882143328494126, + "grad_norm": 1.6587921380996704, + "learning_rate": 1.9635329906009135e-06, + "loss": 0.1033, + "num_input_tokens_seen": 3308736, + "step": 6730 + }, + { + "epoch": 0.8888742246271611, + "grad_norm": 0.29581812024116516, + "learning_rate": 1.96340961034235e-06, + "loss": 0.0311, + "num_input_tokens_seen": 3311168, + "step": 6735 + }, + { + "epoch": 0.8895341164049096, + "grad_norm": 34.09101867675781, + "learning_rate": 1.9632860256070727e-06, + "loss": 0.1654, + "num_input_tokens_seen": 3313664, + "step": 6740 + }, + { + "epoch": 0.890194008182658, + "grad_norm": 20.558460235595703, + "learning_rate": 1.9631622364213124e-06, + "loss": 0.1481, + "num_input_tokens_seen": 3316224, + "step": 6745 + }, + { + "epoch": 0.8908538999604065, + "grad_norm": 108.66929626464844, + "learning_rate": 1.9630382428113416e-06, + "loss": 0.0998, + "num_input_tokens_seen": 3318464, + "step": 6750 + }, + { + "epoch": 0.8915137917381549, + "grad_norm": 0.1910923272371292, + "learning_rate": 1.962914044803478e-06, + "loss": 0.0018, + "num_input_tokens_seen": 3320896, + "step": 6755 + }, + { + "epoch": 0.8921736835159034, + "grad_norm": 0.1292416900396347, + "learning_rate": 1.9627896424240814e-06, + "loss": 0.1516, + "num_input_tokens_seen": 3323648, + "step": 6760 + }, + { + "epoch": 0.8928335752936518, + "grad_norm": 21.599706649780273, + "learning_rate": 1.9626650356995545e-06, + "loss": 0.2309, + "num_input_tokens_seen": 3326208, + "step": 6765 + }, + { + "epoch": 0.8934934670714003, + "grad_norm": 0.48037469387054443, + "learning_rate": 1.9625402246563456e-06, + "loss": 0.1373, + "num_input_tokens_seen": 3328576, + "step": 6770 + }, + { + "epoch": 0.8941533588491487, + "grad_norm": 0.28065234422683716, + "learning_rate": 1.962415209320944e-06, + "loss": 0.0354, + "num_input_tokens_seen": 3331520, + "step": 6775 + }, + { + "epoch": 0.8948132506268972, + "grad_norm": 0.31453850865364075, + "learning_rate": 1.9622899897198834e-06, + "loss": 0.0489, + "num_input_tokens_seen": 3334336, + "step": 6780 + }, + { + "epoch": 0.8954731424046456, + "grad_norm": 69.05518341064453, + "learning_rate": 1.962164565879741e-06, + "loss": 0.1136, + "num_input_tokens_seen": 3336896, + "step": 6785 + }, + { + "epoch": 0.8961330341823941, + "grad_norm": 16.333518981933594, + "learning_rate": 1.9620389378271363e-06, + "loss": 0.1573, + "num_input_tokens_seen": 3339328, + "step": 6790 + }, + { + "epoch": 0.8967929259601425, + "grad_norm": 0.5106508135795593, + "learning_rate": 1.9619131055887343e-06, + "loss": 0.0079, + "num_input_tokens_seen": 3341760, + "step": 6795 + }, + { + "epoch": 0.897452817737891, + "grad_norm": 0.23962059617042542, + "learning_rate": 1.961787069191241e-06, + "loss": 0.1041, + "num_input_tokens_seen": 3344448, + "step": 6800 + }, + { + "epoch": 0.8981127095156395, + "grad_norm": 0.08097890764474869, + "learning_rate": 1.9616608286614065e-06, + "loss": 0.0233, + "num_input_tokens_seen": 3347008, + "step": 6805 + }, + { + "epoch": 0.8987726012933879, + "grad_norm": 0.9426233768463135, + "learning_rate": 1.9615343840260255e-06, + "loss": 0.0408, + "num_input_tokens_seen": 3349824, + "step": 6810 + }, + { + "epoch": 0.8994324930711364, + "grad_norm": 0.28335756063461304, + "learning_rate": 1.9614077353119345e-06, + "loss": 0.0705, + "num_input_tokens_seen": 3352320, + "step": 6815 + }, + { + "epoch": 0.9000923848488848, + "grad_norm": 0.1531563252210617, + "learning_rate": 1.961280882546013e-06, + "loss": 0.0009, + "num_input_tokens_seen": 3354688, + "step": 6820 + }, + { + "epoch": 0.9007522766266333, + "grad_norm": 0.014378640800714493, + "learning_rate": 1.961153825755186e-06, + "loss": 0.0692, + "num_input_tokens_seen": 3357056, + "step": 6825 + }, + { + "epoch": 0.9014121684043817, + "grad_norm": 0.24212691187858582, + "learning_rate": 1.961026564966419e-06, + "loss": 0.0761, + "num_input_tokens_seen": 3359488, + "step": 6830 + }, + { + "epoch": 0.9020720601821302, + "grad_norm": 0.04449746385216713, + "learning_rate": 1.9608991002067233e-06, + "loss": 0.3297, + "num_input_tokens_seen": 3361920, + "step": 6835 + }, + { + "epoch": 0.9027319519598785, + "grad_norm": 0.47794926166534424, + "learning_rate": 1.9607714315031513e-06, + "loss": 0.0016, + "num_input_tokens_seen": 3364416, + "step": 6840 + }, + { + "epoch": 0.903391843737627, + "grad_norm": 0.08138076961040497, + "learning_rate": 1.9606435588828008e-06, + "loss": 0.1103, + "num_input_tokens_seen": 3366912, + "step": 6845 + }, + { + "epoch": 0.9040517355153754, + "grad_norm": 0.005542756523936987, + "learning_rate": 1.960515482372811e-06, + "loss": 0.0008, + "num_input_tokens_seen": 3369088, + "step": 6850 + }, + { + "epoch": 0.9047116272931239, + "grad_norm": 0.005087740253657103, + "learning_rate": 1.960387202000366e-06, + "loss": 0.2938, + "num_input_tokens_seen": 3371520, + "step": 6855 + }, + { + "epoch": 0.9053715190708723, + "grad_norm": 0.011929133906960487, + "learning_rate": 1.9602587177926913e-06, + "loss": 0.0004, + "num_input_tokens_seen": 3374080, + "step": 6860 + }, + { + "epoch": 0.9060314108486208, + "grad_norm": 16.993408203125, + "learning_rate": 1.960130029777058e-06, + "loss": 0.0758, + "num_input_tokens_seen": 3376640, + "step": 6865 + }, + { + "epoch": 0.9066913026263693, + "grad_norm": 0.02640739642083645, + "learning_rate": 1.9600011379807783e-06, + "loss": 0.0005, + "num_input_tokens_seen": 3379072, + "step": 6870 + }, + { + "epoch": 0.9073511944041177, + "grad_norm": 5.115023612976074, + "learning_rate": 1.9598720424312093e-06, + "loss": 0.05, + "num_input_tokens_seen": 3381696, + "step": 6875 + }, + { + "epoch": 0.9080110861818662, + "grad_norm": 11.82519817352295, + "learning_rate": 1.9597427431557497e-06, + "loss": 0.317, + "num_input_tokens_seen": 3384064, + "step": 6880 + }, + { + "epoch": 0.9086709779596146, + "grad_norm": 0.007930797524750233, + "learning_rate": 1.9596132401818427e-06, + "loss": 0.1413, + "num_input_tokens_seen": 3386304, + "step": 6885 + }, + { + "epoch": 0.9093308697373631, + "grad_norm": 0.23658303916454315, + "learning_rate": 1.9594835335369748e-06, + "loss": 0.078, + "num_input_tokens_seen": 3388800, + "step": 6890 + }, + { + "epoch": 0.9099907615151115, + "grad_norm": 0.07035399973392487, + "learning_rate": 1.9593536232486747e-06, + "loss": 0.1664, + "num_input_tokens_seen": 3391232, + "step": 6895 + }, + { + "epoch": 0.91065065329286, + "grad_norm": 20.818607330322266, + "learning_rate": 1.9592235093445153e-06, + "loss": 0.0852, + "num_input_tokens_seen": 3393664, + "step": 6900 + }, + { + "epoch": 0.9113105450706084, + "grad_norm": 57.55811309814453, + "learning_rate": 1.959093191852112e-06, + "loss": 0.1319, + "num_input_tokens_seen": 3395968, + "step": 6905 + }, + { + "epoch": 0.9119704368483569, + "grad_norm": 0.08466242253780365, + "learning_rate": 1.958962670799124e-06, + "loss": 0.1763, + "num_input_tokens_seen": 3398272, + "step": 6910 + }, + { + "epoch": 0.9126303286261053, + "grad_norm": 12.08685302734375, + "learning_rate": 1.9588319462132535e-06, + "loss": 0.2054, + "num_input_tokens_seen": 3400960, + "step": 6915 + }, + { + "epoch": 0.9132902204038538, + "grad_norm": 77.93342590332031, + "learning_rate": 1.9587010181222456e-06, + "loss": 0.2306, + "num_input_tokens_seen": 3403520, + "step": 6920 + }, + { + "epoch": 0.9139501121816023, + "grad_norm": 480.17840576171875, + "learning_rate": 1.9585698865538892e-06, + "loss": 0.2867, + "num_input_tokens_seen": 3405952, + "step": 6925 + }, + { + "epoch": 0.9146100039593507, + "grad_norm": 68.2589111328125, + "learning_rate": 1.9584385515360155e-06, + "loss": 0.1133, + "num_input_tokens_seen": 3408320, + "step": 6930 + }, + { + "epoch": 0.9152698957370992, + "grad_norm": 0.36150798201560974, + "learning_rate": 1.9583070130965e-06, + "loss": 0.0866, + "num_input_tokens_seen": 3410880, + "step": 6935 + }, + { + "epoch": 0.9159297875148475, + "grad_norm": 0.438012033700943, + "learning_rate": 1.95817527126326e-06, + "loss": 0.0082, + "num_input_tokens_seen": 3413440, + "step": 6940 + }, + { + "epoch": 0.916589679292596, + "grad_norm": 2.9928126335144043, + "learning_rate": 1.9580433260642576e-06, + "loss": 0.1116, + "num_input_tokens_seen": 3416000, + "step": 6945 + }, + { + "epoch": 0.9172495710703444, + "grad_norm": 13.603625297546387, + "learning_rate": 1.9579111775274967e-06, + "loss": 0.1138, + "num_input_tokens_seen": 3418240, + "step": 6950 + }, + { + "epoch": 0.9179094628480929, + "grad_norm": 0.3827645778656006, + "learning_rate": 1.957778825681025e-06, + "loss": 0.1691, + "num_input_tokens_seen": 3420672, + "step": 6955 + }, + { + "epoch": 0.9185693546258413, + "grad_norm": 7.339386463165283, + "learning_rate": 1.9576462705529334e-06, + "loss": 0.0336, + "num_input_tokens_seen": 3422912, + "step": 6960 + }, + { + "epoch": 0.9192292464035898, + "grad_norm": 16.184080123901367, + "learning_rate": 1.9575135121713554e-06, + "loss": 0.0039, + "num_input_tokens_seen": 3425408, + "step": 6965 + }, + { + "epoch": 0.9198891381813382, + "grad_norm": 18.327194213867188, + "learning_rate": 1.9573805505644687e-06, + "loss": 0.0885, + "num_input_tokens_seen": 3427776, + "step": 6970 + }, + { + "epoch": 0.9205490299590867, + "grad_norm": 15.888535499572754, + "learning_rate": 1.9572473857604924e-06, + "loss": 0.1885, + "num_input_tokens_seen": 3430336, + "step": 6975 + }, + { + "epoch": 0.9212089217368351, + "grad_norm": 52.742218017578125, + "learning_rate": 1.9571140177876904e-06, + "loss": 0.2446, + "num_input_tokens_seen": 3432896, + "step": 6980 + }, + { + "epoch": 0.9218688135145836, + "grad_norm": 0.11979825794696808, + "learning_rate": 1.956980446674369e-06, + "loss": 0.0608, + "num_input_tokens_seen": 3435136, + "step": 6985 + }, + { + "epoch": 0.9225287052923321, + "grad_norm": 1.5458872318267822, + "learning_rate": 1.9568466724488783e-06, + "loss": 0.0706, + "num_input_tokens_seen": 3437824, + "step": 6990 + }, + { + "epoch": 0.9231885970700805, + "grad_norm": 0.07339257746934891, + "learning_rate": 1.95671269513961e-06, + "loss": 0.0195, + "num_input_tokens_seen": 3440320, + "step": 6995 + }, + { + "epoch": 0.923848488847829, + "grad_norm": 0.3749296963214874, + "learning_rate": 1.9565785147749994e-06, + "loss": 0.1083, + "num_input_tokens_seen": 3442880, + "step": 7000 + }, + { + "epoch": 0.9245083806255774, + "grad_norm": 0.0999370887875557, + "learning_rate": 1.956444131383527e-06, + "loss": 0.0438, + "num_input_tokens_seen": 3445120, + "step": 7005 + }, + { + "epoch": 0.9251682724033259, + "grad_norm": 0.2530590891838074, + "learning_rate": 1.9563095449937133e-06, + "loss": 0.1449, + "num_input_tokens_seen": 3447424, + "step": 7010 + }, + { + "epoch": 0.9258281641810743, + "grad_norm": 0.015032319352030754, + "learning_rate": 1.9561747556341236e-06, + "loss": 0.0746, + "num_input_tokens_seen": 3449920, + "step": 7015 + }, + { + "epoch": 0.9264880559588228, + "grad_norm": 1.8766849040985107, + "learning_rate": 1.9560397633333663e-06, + "loss": 0.0844, + "num_input_tokens_seen": 3452416, + "step": 7020 + }, + { + "epoch": 0.9271479477365712, + "grad_norm": 15.20991039276123, + "learning_rate": 1.955904568120092e-06, + "loss": 0.1329, + "num_input_tokens_seen": 3454912, + "step": 7025 + }, + { + "epoch": 0.9278078395143197, + "grad_norm": 0.08727604895830154, + "learning_rate": 1.955769170022996e-06, + "loss": 0.0823, + "num_input_tokens_seen": 3457472, + "step": 7030 + }, + { + "epoch": 0.928467731292068, + "grad_norm": 0.3139757215976715, + "learning_rate": 1.955633569070814e-06, + "loss": 0.0728, + "num_input_tokens_seen": 3459712, + "step": 7035 + }, + { + "epoch": 0.9291276230698166, + "grad_norm": 0.4277566373348236, + "learning_rate": 1.9554977652923276e-06, + "loss": 0.1126, + "num_input_tokens_seen": 3462144, + "step": 7040 + }, + { + "epoch": 0.9297875148475649, + "grad_norm": 12.390519142150879, + "learning_rate": 1.9553617587163594e-06, + "loss": 0.2659, + "num_input_tokens_seen": 3464512, + "step": 7045 + }, + { + "epoch": 0.9304474066253134, + "grad_norm": 0.03325313329696655, + "learning_rate": 1.955225549371776e-06, + "loss": 0.0937, + "num_input_tokens_seen": 3466880, + "step": 7050 + }, + { + "epoch": 0.931107298403062, + "grad_norm": 0.5559648871421814, + "learning_rate": 1.9550891372874872e-06, + "loss": 0.1008, + "num_input_tokens_seen": 3469248, + "step": 7055 + }, + { + "epoch": 0.9317671901808103, + "grad_norm": 38.42670440673828, + "learning_rate": 1.9549525224924453e-06, + "loss": 0.2362, + "num_input_tokens_seen": 3471616, + "step": 7060 + }, + { + "epoch": 0.9324270819585588, + "grad_norm": 1.161107063293457, + "learning_rate": 1.9548157050156456e-06, + "loss": 0.0761, + "num_input_tokens_seen": 3474240, + "step": 7065 + }, + { + "epoch": 0.9330869737363072, + "grad_norm": 0.5504103899002075, + "learning_rate": 1.9546786848861268e-06, + "loss": 0.0566, + "num_input_tokens_seen": 3476800, + "step": 7070 + }, + { + "epoch": 0.9337468655140557, + "grad_norm": 0.2676885724067688, + "learning_rate": 1.95454146213297e-06, + "loss": 0.091, + "num_input_tokens_seen": 3479488, + "step": 7075 + }, + { + "epoch": 0.9344067572918041, + "grad_norm": 0.022085441276431084, + "learning_rate": 1.954404036785301e-06, + "loss": 0.0015, + "num_input_tokens_seen": 3482176, + "step": 7080 + }, + { + "epoch": 0.9350666490695526, + "grad_norm": 0.4007412791252136, + "learning_rate": 1.9542664088722857e-06, + "loss": 0.078, + "num_input_tokens_seen": 3484800, + "step": 7085 + }, + { + "epoch": 0.935726540847301, + "grad_norm": 0.11671361327171326, + "learning_rate": 1.9541285784231355e-06, + "loss": 0.1244, + "num_input_tokens_seen": 3487488, + "step": 7090 + }, + { + "epoch": 0.9363864326250495, + "grad_norm": 12.453141212463379, + "learning_rate": 1.9539905454671037e-06, + "loss": 0.3198, + "num_input_tokens_seen": 3489728, + "step": 7095 + }, + { + "epoch": 0.9370463244027979, + "grad_norm": 0.04366743937134743, + "learning_rate": 1.953852310033487e-06, + "loss": 0.2493, + "num_input_tokens_seen": 3491904, + "step": 7100 + }, + { + "epoch": 0.9377062161805464, + "grad_norm": 51.18413162231445, + "learning_rate": 1.9537138721516248e-06, + "loss": 0.059, + "num_input_tokens_seen": 3494592, + "step": 7105 + }, + { + "epoch": 0.9383661079582949, + "grad_norm": 0.18797965347766876, + "learning_rate": 1.9535752318508995e-06, + "loss": 0.1345, + "num_input_tokens_seen": 3497088, + "step": 7110 + }, + { + "epoch": 0.9390259997360433, + "grad_norm": 0.30258113145828247, + "learning_rate": 1.9534363891607363e-06, + "loss": 0.0865, + "num_input_tokens_seen": 3499520, + "step": 7115 + }, + { + "epoch": 0.9396858915137918, + "grad_norm": 0.14273835718631744, + "learning_rate": 1.953297344110604e-06, + "loss": 0.1349, + "num_input_tokens_seen": 3502208, + "step": 7120 + }, + { + "epoch": 0.9403457832915402, + "grad_norm": 11.008658409118652, + "learning_rate": 1.9531580967300135e-06, + "loss": 0.1946, + "num_input_tokens_seen": 3504640, + "step": 7125 + }, + { + "epoch": 0.9410056750692887, + "grad_norm": 2.5081124305725098, + "learning_rate": 1.953018647048519e-06, + "loss": 0.0946, + "num_input_tokens_seen": 3506944, + "step": 7130 + }, + { + "epoch": 0.9416655668470371, + "grad_norm": 20.469833374023438, + "learning_rate": 1.9528789950957182e-06, + "loss": 0.2065, + "num_input_tokens_seen": 3509376, + "step": 7135 + }, + { + "epoch": 0.9423254586247856, + "grad_norm": 23.552812576293945, + "learning_rate": 1.9527391409012507e-06, + "loss": 0.096, + "num_input_tokens_seen": 3511680, + "step": 7140 + }, + { + "epoch": 0.942985350402534, + "grad_norm": 251.3920440673828, + "learning_rate": 1.9525990844948e-06, + "loss": 0.0617, + "num_input_tokens_seen": 3514112, + "step": 7145 + }, + { + "epoch": 0.9436452421802825, + "grad_norm": 0.5825870633125305, + "learning_rate": 1.952458825906092e-06, + "loss": 0.0848, + "num_input_tokens_seen": 3516480, + "step": 7150 + }, + { + "epoch": 0.9443051339580308, + "grad_norm": 0.27023938298225403, + "learning_rate": 1.952318365164895e-06, + "loss": 0.1279, + "num_input_tokens_seen": 3518720, + "step": 7155 + }, + { + "epoch": 0.9449650257357793, + "grad_norm": 0.17718826234340668, + "learning_rate": 1.952177702301021e-06, + "loss": 0.0011, + "num_input_tokens_seen": 3521216, + "step": 7160 + }, + { + "epoch": 0.9456249175135277, + "grad_norm": 13.897564888000488, + "learning_rate": 1.9520368373443246e-06, + "loss": 0.2427, + "num_input_tokens_seen": 3523776, + "step": 7165 + }, + { + "epoch": 0.9462848092912762, + "grad_norm": 117.28556823730469, + "learning_rate": 1.951895770324704e-06, + "loss": 0.0515, + "num_input_tokens_seen": 3526272, + "step": 7170 + }, + { + "epoch": 0.9469447010690247, + "grad_norm": 1.6187975406646729, + "learning_rate": 1.9517545012720993e-06, + "loss": 0.1211, + "num_input_tokens_seen": 3528576, + "step": 7175 + }, + { + "epoch": 0.9476045928467731, + "grad_norm": 1.0781744718551636, + "learning_rate": 1.9516130302164937e-06, + "loss": 0.0018, + "num_input_tokens_seen": 3531136, + "step": 7180 + }, + { + "epoch": 0.9482644846245216, + "grad_norm": 13.055898666381836, + "learning_rate": 1.9514713571879135e-06, + "loss": 0.2951, + "num_input_tokens_seen": 3533696, + "step": 7185 + }, + { + "epoch": 0.94892437640227, + "grad_norm": 0.1939694732427597, + "learning_rate": 1.9513294822164274e-06, + "loss": 0.0036, + "num_input_tokens_seen": 3536064, + "step": 7190 + }, + { + "epoch": 0.9495842681800185, + "grad_norm": 1.0631992816925049, + "learning_rate": 1.9511874053321483e-06, + "loss": 0.0063, + "num_input_tokens_seen": 3538432, + "step": 7195 + }, + { + "epoch": 0.9502441599577669, + "grad_norm": 0.25243115425109863, + "learning_rate": 1.95104512656523e-06, + "loss": 0.0012, + "num_input_tokens_seen": 3541120, + "step": 7200 + }, + { + "epoch": 0.9509040517355154, + "grad_norm": 60.04290771484375, + "learning_rate": 1.9509026459458702e-06, + "loss": 0.0973, + "num_input_tokens_seen": 3543680, + "step": 7205 + }, + { + "epoch": 0.9515639435132638, + "grad_norm": 40.85661697387695, + "learning_rate": 1.95075996350431e-06, + "loss": 0.2608, + "num_input_tokens_seen": 3545984, + "step": 7210 + }, + { + "epoch": 0.9522238352910123, + "grad_norm": 14.948753356933594, + "learning_rate": 1.9506170792708327e-06, + "loss": 0.0943, + "num_input_tokens_seen": 3548544, + "step": 7215 + }, + { + "epoch": 0.9528837270687607, + "grad_norm": 0.013944868929684162, + "learning_rate": 1.950473993275764e-06, + "loss": 0.0524, + "num_input_tokens_seen": 3551040, + "step": 7220 + }, + { + "epoch": 0.9535436188465092, + "grad_norm": 0.06707829982042313, + "learning_rate": 1.950330705549473e-06, + "loss": 0.1268, + "num_input_tokens_seen": 3553536, + "step": 7225 + }, + { + "epoch": 0.9542035106242576, + "grad_norm": 0.03719603642821312, + "learning_rate": 1.950187216122371e-06, + "loss": 0.1104, + "num_input_tokens_seen": 3555712, + "step": 7230 + }, + { + "epoch": 0.9548634024020061, + "grad_norm": 0.2717171609401703, + "learning_rate": 1.9500435250249136e-06, + "loss": 0.1443, + "num_input_tokens_seen": 3558080, + "step": 7235 + }, + { + "epoch": 0.9555232941797546, + "grad_norm": 133.826171875, + "learning_rate": 1.949899632287598e-06, + "loss": 0.1269, + "num_input_tokens_seen": 3560640, + "step": 7240 + }, + { + "epoch": 0.956183185957503, + "grad_norm": 38.52214431762695, + "learning_rate": 1.9497555379409633e-06, + "loss": 0.0389, + "num_input_tokens_seen": 3563392, + "step": 7245 + }, + { + "epoch": 0.9568430777352515, + "grad_norm": 14.870156288146973, + "learning_rate": 1.9496112420155937e-06, + "loss": 0.309, + "num_input_tokens_seen": 3565824, + "step": 7250 + }, + { + "epoch": 0.9575029695129998, + "grad_norm": 0.18774457275867462, + "learning_rate": 1.949466744542115e-06, + "loss": 0.0023, + "num_input_tokens_seen": 3568256, + "step": 7255 + }, + { + "epoch": 0.9581628612907483, + "grad_norm": 19.96299171447754, + "learning_rate": 1.9493220455511943e-06, + "loss": 0.0659, + "num_input_tokens_seen": 3570752, + "step": 7260 + }, + { + "epoch": 0.9588227530684967, + "grad_norm": 7.811553955078125, + "learning_rate": 1.9491771450735444e-06, + "loss": 0.1136, + "num_input_tokens_seen": 3572928, + "step": 7265 + }, + { + "epoch": 0.9594826448462452, + "grad_norm": 25.050922393798828, + "learning_rate": 1.9490320431399186e-06, + "loss": 0.2459, + "num_input_tokens_seen": 3575296, + "step": 7270 + }, + { + "epoch": 0.9601425366239936, + "grad_norm": 0.148186594247818, + "learning_rate": 1.9488867397811143e-06, + "loss": 0.0014, + "num_input_tokens_seen": 3577664, + "step": 7275 + }, + { + "epoch": 0.9608024284017421, + "grad_norm": 0.40033382177352905, + "learning_rate": 1.948741235027971e-06, + "loss": 0.1765, + "num_input_tokens_seen": 3580160, + "step": 7280 + }, + { + "epoch": 0.9614623201794905, + "grad_norm": 87.95939636230469, + "learning_rate": 1.9485955289113703e-06, + "loss": 0.1181, + "num_input_tokens_seen": 3582464, + "step": 7285 + }, + { + "epoch": 0.962122211957239, + "grad_norm": 1.9860565662384033, + "learning_rate": 1.9484496214622375e-06, + "loss": 0.1001, + "num_input_tokens_seen": 3584896, + "step": 7290 + }, + { + "epoch": 0.9627821037349875, + "grad_norm": 0.29375576972961426, + "learning_rate": 1.9483035127115416e-06, + "loss": 0.0035, + "num_input_tokens_seen": 3587584, + "step": 7295 + }, + { + "epoch": 0.9634419955127359, + "grad_norm": 14.525373458862305, + "learning_rate": 1.948157202690292e-06, + "loss": 0.149, + "num_input_tokens_seen": 3590144, + "step": 7300 + }, + { + "epoch": 0.9641018872904844, + "grad_norm": 0.25842082500457764, + "learning_rate": 1.9480106914295416e-06, + "loss": 0.0409, + "num_input_tokens_seen": 3592832, + "step": 7305 + }, + { + "epoch": 0.9647617790682328, + "grad_norm": 0.6491772532463074, + "learning_rate": 1.947863978960387e-06, + "loss": 0.1264, + "num_input_tokens_seen": 3595456, + "step": 7310 + }, + { + "epoch": 0.9654216708459813, + "grad_norm": 0.36299997568130493, + "learning_rate": 1.947717065313967e-06, + "loss": 0.1478, + "num_input_tokens_seen": 3597888, + "step": 7315 + }, + { + "epoch": 0.9660815626237297, + "grad_norm": 18.739669799804688, + "learning_rate": 1.9475699505214625e-06, + "loss": 0.0841, + "num_input_tokens_seen": 3600384, + "step": 7320 + }, + { + "epoch": 0.9667414544014782, + "grad_norm": 13.522602081298828, + "learning_rate": 1.947422634614098e-06, + "loss": 0.183, + "num_input_tokens_seen": 3602880, + "step": 7325 + }, + { + "epoch": 0.9674013461792266, + "grad_norm": 0.14512999355793, + "learning_rate": 1.94727511762314e-06, + "loss": 0.0148, + "num_input_tokens_seen": 3605248, + "step": 7330 + }, + { + "epoch": 0.9680612379569751, + "grad_norm": 0.26263144612312317, + "learning_rate": 1.9471273995798977e-06, + "loss": 0.0009, + "num_input_tokens_seen": 3607808, + "step": 7335 + }, + { + "epoch": 0.9687211297347235, + "grad_norm": 12.533109664916992, + "learning_rate": 1.9469794805157235e-06, + "loss": 0.2154, + "num_input_tokens_seen": 3610112, + "step": 7340 + }, + { + "epoch": 0.969381021512472, + "grad_norm": 24.23501968383789, + "learning_rate": 1.946831360462012e-06, + "loss": 0.1076, + "num_input_tokens_seen": 3612352, + "step": 7345 + }, + { + "epoch": 0.9700409132902204, + "grad_norm": 15.151259422302246, + "learning_rate": 1.946683039450201e-06, + "loss": 0.184, + "num_input_tokens_seen": 3614848, + "step": 7350 + }, + { + "epoch": 0.9707008050679689, + "grad_norm": 0.8318037390708923, + "learning_rate": 1.9465345175117698e-06, + "loss": 0.0021, + "num_input_tokens_seen": 3617408, + "step": 7355 + }, + { + "epoch": 0.9713606968457174, + "grad_norm": 223.77159118652344, + "learning_rate": 1.9463857946782418e-06, + "loss": 0.1402, + "num_input_tokens_seen": 3619968, + "step": 7360 + }, + { + "epoch": 0.9720205886234657, + "grad_norm": 2.1519546508789062, + "learning_rate": 1.9462368709811816e-06, + "loss": 0.1302, + "num_input_tokens_seen": 3622016, + "step": 7365 + }, + { + "epoch": 0.9726804804012142, + "grad_norm": 13.423410415649414, + "learning_rate": 1.946087746452198e-06, + "loss": 0.115, + "num_input_tokens_seen": 3624192, + "step": 7370 + }, + { + "epoch": 0.9733403721789626, + "grad_norm": 0.11041421443223953, + "learning_rate": 1.945938421122941e-06, + "loss": 0.0111, + "num_input_tokens_seen": 3626624, + "step": 7375 + }, + { + "epoch": 0.9740002639567111, + "grad_norm": 14.675050735473633, + "learning_rate": 1.9457888950251045e-06, + "loss": 0.1937, + "num_input_tokens_seen": 3628928, + "step": 7380 + }, + { + "epoch": 0.9746601557344595, + "grad_norm": 0.20445093512535095, + "learning_rate": 1.9456391681904234e-06, + "loss": 0.1085, + "num_input_tokens_seen": 3631552, + "step": 7385 + }, + { + "epoch": 0.975320047512208, + "grad_norm": 15.725769996643066, + "learning_rate": 1.9454892406506774e-06, + "loss": 0.078, + "num_input_tokens_seen": 3633984, + "step": 7390 + }, + { + "epoch": 0.9759799392899564, + "grad_norm": 1.7748395204544067, + "learning_rate": 1.945339112437686e-06, + "loss": 0.2813, + "num_input_tokens_seen": 3636224, + "step": 7395 + }, + { + "epoch": 0.9766398310677049, + "grad_norm": 0.23947864770889282, + "learning_rate": 1.945188783583314e-06, + "loss": 0.1583, + "num_input_tokens_seen": 3638656, + "step": 7400 + }, + { + "epoch": 0.9772997228454533, + "grad_norm": 0.18196658790111542, + "learning_rate": 1.945038254119467e-06, + "loss": 0.0369, + "num_input_tokens_seen": 3641408, + "step": 7405 + }, + { + "epoch": 0.9779596146232018, + "grad_norm": 13.365228652954102, + "learning_rate": 1.944887524078094e-06, + "loss": 0.133, + "num_input_tokens_seen": 3643840, + "step": 7410 + }, + { + "epoch": 0.9786195064009502, + "grad_norm": 0.32755666971206665, + "learning_rate": 1.9447365934911862e-06, + "loss": 0.058, + "num_input_tokens_seen": 3646336, + "step": 7415 + }, + { + "epoch": 0.9792793981786987, + "grad_norm": 0.40165284276008606, + "learning_rate": 1.944585462390778e-06, + "loss": 0.086, + "num_input_tokens_seen": 3648960, + "step": 7420 + }, + { + "epoch": 0.9799392899564472, + "grad_norm": 24.77637481689453, + "learning_rate": 1.9444341308089456e-06, + "loss": 0.0681, + "num_input_tokens_seen": 3651200, + "step": 7425 + }, + { + "epoch": 0.9805991817341956, + "grad_norm": 0.17811863124370575, + "learning_rate": 1.944282598777808e-06, + "loss": 0.0279, + "num_input_tokens_seen": 3653504, + "step": 7430 + }, + { + "epoch": 0.9812590735119441, + "grad_norm": 24.5549373626709, + "learning_rate": 1.9441308663295264e-06, + "loss": 0.3589, + "num_input_tokens_seen": 3656064, + "step": 7435 + }, + { + "epoch": 0.9819189652896925, + "grad_norm": 44.16228485107422, + "learning_rate": 1.9439789334963055e-06, + "loss": 0.4366, + "num_input_tokens_seen": 3658112, + "step": 7440 + }, + { + "epoch": 0.982578857067441, + "grad_norm": 30.133174896240234, + "learning_rate": 1.9438268003103916e-06, + "loss": 0.1661, + "num_input_tokens_seen": 3660928, + "step": 7445 + }, + { + "epoch": 0.9832387488451894, + "grad_norm": 0.4015233814716339, + "learning_rate": 1.943674466804074e-06, + "loss": 0.1425, + "num_input_tokens_seen": 3663232, + "step": 7450 + }, + { + "epoch": 0.9838986406229379, + "grad_norm": 0.3251698315143585, + "learning_rate": 1.9435219330096845e-06, + "loss": 0.0064, + "num_input_tokens_seen": 3665600, + "step": 7455 + }, + { + "epoch": 0.9845585324006862, + "grad_norm": 0.10743151605129242, + "learning_rate": 1.9433691989595975e-06, + "loss": 0.0393, + "num_input_tokens_seen": 3668096, + "step": 7460 + }, + { + "epoch": 0.9852184241784347, + "grad_norm": 0.22793497145175934, + "learning_rate": 1.943216264686229e-06, + "loss": 0.0312, + "num_input_tokens_seen": 3670656, + "step": 7465 + }, + { + "epoch": 0.9858783159561831, + "grad_norm": 0.06277221441268921, + "learning_rate": 1.943063130222038e-06, + "loss": 0.157, + "num_input_tokens_seen": 3673024, + "step": 7470 + }, + { + "epoch": 0.9865382077339316, + "grad_norm": 0.04624286666512489, + "learning_rate": 1.9429097955995275e-06, + "loss": 0.0569, + "num_input_tokens_seen": 3675712, + "step": 7475 + }, + { + "epoch": 0.9871980995116801, + "grad_norm": 0.07568157464265823, + "learning_rate": 1.9427562608512406e-06, + "loss": 0.1377, + "num_input_tokens_seen": 3678080, + "step": 7480 + }, + { + "epoch": 0.9878579912894285, + "grad_norm": 28.12094497680664, + "learning_rate": 1.9426025260097645e-06, + "loss": 0.2043, + "num_input_tokens_seen": 3680448, + "step": 7485 + }, + { + "epoch": 0.988517883067177, + "grad_norm": 24.91111946105957, + "learning_rate": 1.9424485911077278e-06, + "loss": 0.0405, + "num_input_tokens_seen": 3682752, + "step": 7490 + }, + { + "epoch": 0.9891777748449254, + "grad_norm": 13.07931137084961, + "learning_rate": 1.9422944561778026e-06, + "loss": 0.1633, + "num_input_tokens_seen": 3685376, + "step": 7495 + }, + { + "epoch": 0.9898376666226739, + "grad_norm": 0.2622494697570801, + "learning_rate": 1.9421401212527023e-06, + "loss": 0.0496, + "num_input_tokens_seen": 3687744, + "step": 7500 + }, + { + "epoch": 0.9904975584004223, + "grad_norm": 33.244964599609375, + "learning_rate": 1.9419855863651837e-06, + "loss": 0.1828, + "num_input_tokens_seen": 3690240, + "step": 7505 + }, + { + "epoch": 0.9911574501781708, + "grad_norm": 0.11383026838302612, + "learning_rate": 1.941830851548046e-06, + "loss": 0.1345, + "num_input_tokens_seen": 3692736, + "step": 7510 + }, + { + "epoch": 0.9918173419559192, + "grad_norm": 10.269845008850098, + "learning_rate": 1.94167591683413e-06, + "loss": 0.1963, + "num_input_tokens_seen": 3695360, + "step": 7515 + }, + { + "epoch": 0.9924772337336677, + "grad_norm": 0.6313570737838745, + "learning_rate": 1.94152078225632e-06, + "loss": 0.0683, + "num_input_tokens_seen": 3697856, + "step": 7520 + }, + { + "epoch": 0.9931371255114161, + "grad_norm": 9.661954879760742, + "learning_rate": 1.9413654478475415e-06, + "loss": 0.0825, + "num_input_tokens_seen": 3700224, + "step": 7525 + }, + { + "epoch": 0.9937970172891646, + "grad_norm": 15.635941505432129, + "learning_rate": 1.941209913640764e-06, + "loss": 0.1488, + "num_input_tokens_seen": 3702592, + "step": 7530 + }, + { + "epoch": 0.994456909066913, + "grad_norm": 0.36205700039863586, + "learning_rate": 1.9410541796689975e-06, + "loss": 0.1055, + "num_input_tokens_seen": 3704896, + "step": 7535 + }, + { + "epoch": 0.9951168008446615, + "grad_norm": 1.9162219762802124, + "learning_rate": 1.9408982459652963e-06, + "loss": 0.0121, + "num_input_tokens_seen": 3707264, + "step": 7540 + }, + { + "epoch": 0.99577669262241, + "grad_norm": 13.23646354675293, + "learning_rate": 1.940742112562756e-06, + "loss": 0.0858, + "num_input_tokens_seen": 3709824, + "step": 7545 + }, + { + "epoch": 0.9964365844001584, + "grad_norm": 0.05299551039934158, + "learning_rate": 1.9405857794945142e-06, + "loss": 0.1293, + "num_input_tokens_seen": 3712192, + "step": 7550 + }, + { + "epoch": 0.9970964761779069, + "grad_norm": 118.45269775390625, + "learning_rate": 1.9404292467937525e-06, + "loss": 0.0699, + "num_input_tokens_seen": 3714880, + "step": 7555 + }, + { + "epoch": 0.9977563679556553, + "grad_norm": 4.233432769775391, + "learning_rate": 1.9402725144936926e-06, + "loss": 0.0584, + "num_input_tokens_seen": 3717184, + "step": 7560 + }, + { + "epoch": 0.9984162597334038, + "grad_norm": 0.11062260717153549, + "learning_rate": 1.940115582627601e-06, + "loss": 0.0402, + "num_input_tokens_seen": 3719424, + "step": 7565 + }, + { + "epoch": 0.9990761515111521, + "grad_norm": 0.0450112447142601, + "learning_rate": 1.9399584512287842e-06, + "loss": 0.0668, + "num_input_tokens_seen": 3721920, + "step": 7570 + }, + { + "epoch": 0.9997360432889006, + "grad_norm": 0.37227654457092285, + "learning_rate": 1.939801120330593e-06, + "loss": 0.123, + "num_input_tokens_seen": 3724288, + "step": 7575 + }, + { + "epoch": 1.0003959350666491, + "grad_norm": 0.26726233959198, + "learning_rate": 1.9396435899664198e-06, + "loss": 0.0006, + "num_input_tokens_seen": 3726464, + "step": 7580 + }, + { + "epoch": 1.0003959350666491, + "eval_loss": 0.11427787691354752, + "eval_runtime": 7.8131, + "eval_samples_per_second": 862.014, + "eval_steps_per_second": 107.768, + "num_input_tokens_seen": 3726464, + "step": 7580 + }, + { + "epoch": 1.0010558268443974, + "grad_norm": 0.03733626753091812, + "learning_rate": 1.9394858601696986e-06, + "loss": 0.0614, + "num_input_tokens_seen": 3728960, + "step": 7585 + }, + { + "epoch": 1.001715718622146, + "grad_norm": 4.338964462280273, + "learning_rate": 1.9393279309739067e-06, + "loss": 0.0011, + "num_input_tokens_seen": 3731648, + "step": 7590 + }, + { + "epoch": 1.0023756103998944, + "grad_norm": 0.05182173103094101, + "learning_rate": 1.939169802412564e-06, + "loss": 0.0326, + "num_input_tokens_seen": 3734144, + "step": 7595 + }, + { + "epoch": 1.003035502177643, + "grad_norm": 0.3849523663520813, + "learning_rate": 1.939011474519231e-06, + "loss": 0.1584, + "num_input_tokens_seen": 3736704, + "step": 7600 + }, + { + "epoch": 1.0036953939553914, + "grad_norm": 0.08171097934246063, + "learning_rate": 1.938852947327513e-06, + "loss": 0.1347, + "num_input_tokens_seen": 3739328, + "step": 7605 + }, + { + "epoch": 1.0043552857331397, + "grad_norm": 14.762300491333008, + "learning_rate": 1.938694220871055e-06, + "loss": 0.1089, + "num_input_tokens_seen": 3741760, + "step": 7610 + }, + { + "epoch": 1.0050151775108882, + "grad_norm": 0.04276171326637268, + "learning_rate": 1.938535295183547e-06, + "loss": 0.0551, + "num_input_tokens_seen": 3744384, + "step": 7615 + }, + { + "epoch": 1.0056750692886367, + "grad_norm": 0.042462147772312164, + "learning_rate": 1.938376170298718e-06, + "loss": 0.1567, + "num_input_tokens_seen": 3746816, + "step": 7620 + }, + { + "epoch": 1.0063349610663852, + "grad_norm": 0.3363673686981201, + "learning_rate": 1.9382168462503425e-06, + "loss": 0.0361, + "num_input_tokens_seen": 3749504, + "step": 7625 + }, + { + "epoch": 1.0069948528441335, + "grad_norm": 0.15293100476264954, + "learning_rate": 1.9380573230722354e-06, + "loss": 0.052, + "num_input_tokens_seen": 3751936, + "step": 7630 + }, + { + "epoch": 1.007654744621882, + "grad_norm": 0.10599560290575027, + "learning_rate": 1.9378976007982543e-06, + "loss": 0.0017, + "num_input_tokens_seen": 3754368, + "step": 7635 + }, + { + "epoch": 1.0083146363996305, + "grad_norm": 29.61864471435547, + "learning_rate": 1.9377376794622992e-06, + "loss": 0.0029, + "num_input_tokens_seen": 3756736, + "step": 7640 + }, + { + "epoch": 1.008974528177379, + "grad_norm": 2.77350115776062, + "learning_rate": 1.937577559098312e-06, + "loss": 0.0945, + "num_input_tokens_seen": 3759360, + "step": 7645 + }, + { + "epoch": 1.0096344199551273, + "grad_norm": 0.023661043494939804, + "learning_rate": 1.9374172397402774e-06, + "loss": 0.0614, + "num_input_tokens_seen": 3761536, + "step": 7650 + }, + { + "epoch": 1.0102943117328758, + "grad_norm": 21.96059799194336, + "learning_rate": 1.937256721422222e-06, + "loss": 0.1333, + "num_input_tokens_seen": 3763968, + "step": 7655 + }, + { + "epoch": 1.0109542035106243, + "grad_norm": 16.02443504333496, + "learning_rate": 1.9370960041782144e-06, + "loss": 0.1473, + "num_input_tokens_seen": 3766336, + "step": 7660 + }, + { + "epoch": 1.0116140952883728, + "grad_norm": 0.16665181517601013, + "learning_rate": 1.936935088042366e-06, + "loss": 0.1232, + "num_input_tokens_seen": 3768832, + "step": 7665 + }, + { + "epoch": 1.0122739870661213, + "grad_norm": 0.06876052170991898, + "learning_rate": 1.9367739730488295e-06, + "loss": 0.066, + "num_input_tokens_seen": 3771264, + "step": 7670 + }, + { + "epoch": 1.0129338788438695, + "grad_norm": 0.15175847709178925, + "learning_rate": 1.9366126592318012e-06, + "loss": 0.0628, + "num_input_tokens_seen": 3773760, + "step": 7675 + }, + { + "epoch": 1.013593770621618, + "grad_norm": 1.0382298231124878, + "learning_rate": 1.936451146625518e-06, + "loss": 0.069, + "num_input_tokens_seen": 3776064, + "step": 7680 + }, + { + "epoch": 1.0142536623993665, + "grad_norm": 0.12675048410892487, + "learning_rate": 1.9362894352642606e-06, + "loss": 0.0009, + "num_input_tokens_seen": 3778496, + "step": 7685 + }, + { + "epoch": 1.014913554177115, + "grad_norm": 0.25615447759628296, + "learning_rate": 1.9361275251823507e-06, + "loss": 0.0102, + "num_input_tokens_seen": 3780928, + "step": 7690 + }, + { + "epoch": 1.0155734459548633, + "grad_norm": 0.049605101346969604, + "learning_rate": 1.935965416414152e-06, + "loss": 0.1457, + "num_input_tokens_seen": 3783360, + "step": 7695 + }, + { + "epoch": 1.0162333377326118, + "grad_norm": 0.04638204351067543, + "learning_rate": 1.935803108994072e-06, + "loss": 0.0004, + "num_input_tokens_seen": 3785664, + "step": 7700 + }, + { + "epoch": 1.0168932295103603, + "grad_norm": 0.0021269829012453556, + "learning_rate": 1.9356406029565584e-06, + "loss": 0.0002, + "num_input_tokens_seen": 3788288, + "step": 7705 + }, + { + "epoch": 1.0175531212881088, + "grad_norm": 63.27873229980469, + "learning_rate": 1.935477898336102e-06, + "loss": 0.2777, + "num_input_tokens_seen": 3790784, + "step": 7710 + }, + { + "epoch": 1.018213013065857, + "grad_norm": 0.013677487149834633, + "learning_rate": 1.935314995167236e-06, + "loss": 0.0015, + "num_input_tokens_seen": 3793152, + "step": 7715 + }, + { + "epoch": 1.0188729048436056, + "grad_norm": 0.01026434451341629, + "learning_rate": 1.9351518934845355e-06, + "loss": 0.0003, + "num_input_tokens_seen": 3795712, + "step": 7720 + }, + { + "epoch": 1.019532796621354, + "grad_norm": 18.70940589904785, + "learning_rate": 1.934988593322617e-06, + "loss": 0.0478, + "num_input_tokens_seen": 3798080, + "step": 7725 + }, + { + "epoch": 1.0201926883991026, + "grad_norm": 17.24424934387207, + "learning_rate": 1.934825094716141e-06, + "loss": 0.1037, + "num_input_tokens_seen": 3800640, + "step": 7730 + }, + { + "epoch": 1.020852580176851, + "grad_norm": 26.107290267944336, + "learning_rate": 1.9346613976998075e-06, + "loss": 0.1827, + "num_input_tokens_seen": 3802944, + "step": 7735 + }, + { + "epoch": 1.0215124719545994, + "grad_norm": 16.36334991455078, + "learning_rate": 1.9344975023083606e-06, + "loss": 0.0798, + "num_input_tokens_seen": 3805504, + "step": 7740 + }, + { + "epoch": 1.0221723637323479, + "grad_norm": 0.3354093134403229, + "learning_rate": 1.9343334085765862e-06, + "loss": 0.1328, + "num_input_tokens_seen": 3807744, + "step": 7745 + }, + { + "epoch": 1.0228322555100964, + "grad_norm": 0.12368268519639969, + "learning_rate": 1.9341691165393116e-06, + "loss": 0.028, + "num_input_tokens_seen": 3810112, + "step": 7750 + }, + { + "epoch": 1.0234921472878449, + "grad_norm": 0.2610388994216919, + "learning_rate": 1.9340046262314065e-06, + "loss": 0.1645, + "num_input_tokens_seen": 3812608, + "step": 7755 + }, + { + "epoch": 1.0241520390655932, + "grad_norm": 0.12910595536231995, + "learning_rate": 1.9338399376877835e-06, + "loss": 0.0013, + "num_input_tokens_seen": 3814912, + "step": 7760 + }, + { + "epoch": 1.0248119308433417, + "grad_norm": 0.11450402438640594, + "learning_rate": 1.9336750509433958e-06, + "loss": 0.0006, + "num_input_tokens_seen": 3817600, + "step": 7765 + }, + { + "epoch": 1.0254718226210902, + "grad_norm": 0.05036715790629387, + "learning_rate": 1.93350996603324e-06, + "loss": 0.1336, + "num_input_tokens_seen": 3819904, + "step": 7770 + }, + { + "epoch": 1.0261317143988387, + "grad_norm": 4.654677391052246, + "learning_rate": 1.933344682992353e-06, + "loss": 0.0452, + "num_input_tokens_seen": 3822272, + "step": 7775 + }, + { + "epoch": 1.026791606176587, + "grad_norm": 117.1086654663086, + "learning_rate": 1.9331792018558165e-06, + "loss": 0.1192, + "num_input_tokens_seen": 3824512, + "step": 7780 + }, + { + "epoch": 1.0274514979543354, + "grad_norm": 0.021433783695101738, + "learning_rate": 1.933013522658752e-06, + "loss": 0.0616, + "num_input_tokens_seen": 3827072, + "step": 7785 + }, + { + "epoch": 1.028111389732084, + "grad_norm": 0.1878979206085205, + "learning_rate": 1.9328476454363235e-06, + "loss": 0.1669, + "num_input_tokens_seen": 3829632, + "step": 7790 + }, + { + "epoch": 1.0287712815098324, + "grad_norm": 0.5877869725227356, + "learning_rate": 1.932681570223737e-06, + "loss": 0.0014, + "num_input_tokens_seen": 3832000, + "step": 7795 + }, + { + "epoch": 1.029431173287581, + "grad_norm": 0.03438083082437515, + "learning_rate": 1.9325152970562418e-06, + "loss": 0.0993, + "num_input_tokens_seen": 3834624, + "step": 7800 + }, + { + "epoch": 1.0300910650653292, + "grad_norm": 50.06906509399414, + "learning_rate": 1.9323488259691273e-06, + "loss": 0.1106, + "num_input_tokens_seen": 3836992, + "step": 7805 + }, + { + "epoch": 1.0307509568430777, + "grad_norm": 0.03151680901646614, + "learning_rate": 1.932182156997726e-06, + "loss": 0.1535, + "num_input_tokens_seen": 3839488, + "step": 7810 + }, + { + "epoch": 1.0314108486208262, + "grad_norm": 4.190992832183838, + "learning_rate": 1.9320152901774124e-06, + "loss": 0.0557, + "num_input_tokens_seen": 3842112, + "step": 7815 + }, + { + "epoch": 1.0320707403985747, + "grad_norm": 0.07871054112911224, + "learning_rate": 1.9318482255436022e-06, + "loss": 0.0745, + "num_input_tokens_seen": 3844288, + "step": 7820 + }, + { + "epoch": 1.032730632176323, + "grad_norm": 0.023968158289790154, + "learning_rate": 1.9316809631317544e-06, + "loss": 0.1813, + "num_input_tokens_seen": 3846656, + "step": 7825 + }, + { + "epoch": 1.0333905239540715, + "grad_norm": 0.8618749380111694, + "learning_rate": 1.931513502977369e-06, + "loss": 0.0014, + "num_input_tokens_seen": 3849344, + "step": 7830 + }, + { + "epoch": 1.03405041573182, + "grad_norm": 0.11035836488008499, + "learning_rate": 1.931345845115988e-06, + "loss": 0.0637, + "num_input_tokens_seen": 3851712, + "step": 7835 + }, + { + "epoch": 1.0347103075095685, + "grad_norm": 163.74595642089844, + "learning_rate": 1.931177989583195e-06, + "loss": 0.0103, + "num_input_tokens_seen": 3854080, + "step": 7840 + }, + { + "epoch": 1.0353701992873168, + "grad_norm": 52.531402587890625, + "learning_rate": 1.9310099364146174e-06, + "loss": 0.063, + "num_input_tokens_seen": 3856576, + "step": 7845 + }, + { + "epoch": 1.0360300910650653, + "grad_norm": 28.098182678222656, + "learning_rate": 1.930841685645922e-06, + "loss": 0.1425, + "num_input_tokens_seen": 3859008, + "step": 7850 + }, + { + "epoch": 1.0366899828428138, + "grad_norm": 0.5196312069892883, + "learning_rate": 1.93067323731282e-06, + "loss": 0.0006, + "num_input_tokens_seen": 3861376, + "step": 7855 + }, + { + "epoch": 1.0373498746205623, + "grad_norm": 0.149097740650177, + "learning_rate": 1.930504591451063e-06, + "loss": 0.0849, + "num_input_tokens_seen": 3863872, + "step": 7860 + }, + { + "epoch": 1.0380097663983108, + "grad_norm": 0.03719232231378555, + "learning_rate": 1.9303357480964445e-06, + "loss": 0.073, + "num_input_tokens_seen": 3866304, + "step": 7865 + }, + { + "epoch": 1.038669658176059, + "grad_norm": 0.17920592427253723, + "learning_rate": 1.9301667072848002e-06, + "loss": 0.0006, + "num_input_tokens_seen": 3868864, + "step": 7870 + }, + { + "epoch": 1.0393295499538076, + "grad_norm": 0.6391183137893677, + "learning_rate": 1.929997469052008e-06, + "loss": 0.0896, + "num_input_tokens_seen": 3870976, + "step": 7875 + }, + { + "epoch": 1.039989441731556, + "grad_norm": 0.045991070568561554, + "learning_rate": 1.929828033433988e-06, + "loss": 0.132, + "num_input_tokens_seen": 3873408, + "step": 7880 + }, + { + "epoch": 1.0406493335093046, + "grad_norm": 19.03218650817871, + "learning_rate": 1.9296584004667005e-06, + "loss": 0.2669, + "num_input_tokens_seen": 3875776, + "step": 7885 + }, + { + "epoch": 1.0413092252870528, + "grad_norm": 0.6183151602745056, + "learning_rate": 1.92948857018615e-06, + "loss": 0.0612, + "num_input_tokens_seen": 3878144, + "step": 7890 + }, + { + "epoch": 1.0419691170648013, + "grad_norm": 0.5006767511367798, + "learning_rate": 1.929318542628381e-06, + "loss": 0.1703, + "num_input_tokens_seen": 3880512, + "step": 7895 + }, + { + "epoch": 1.0426290088425498, + "grad_norm": 0.08102352917194366, + "learning_rate": 1.9291483178294813e-06, + "loss": 0.0021, + "num_input_tokens_seen": 3882880, + "step": 7900 + }, + { + "epoch": 1.0432889006202983, + "grad_norm": 0.03578070178627968, + "learning_rate": 1.928977895825579e-06, + "loss": 0.0301, + "num_input_tokens_seen": 3885312, + "step": 7905 + }, + { + "epoch": 1.0439487923980466, + "grad_norm": 0.05146459862589836, + "learning_rate": 1.928807276652846e-06, + "loss": 0.0624, + "num_input_tokens_seen": 3887744, + "step": 7910 + }, + { + "epoch": 1.044608684175795, + "grad_norm": 0.05097321793437004, + "learning_rate": 1.928636460347494e-06, + "loss": 0.0541, + "num_input_tokens_seen": 3890048, + "step": 7915 + }, + { + "epoch": 1.0452685759535436, + "grad_norm": 101.96385955810547, + "learning_rate": 1.928465446945778e-06, + "loss": 0.0159, + "num_input_tokens_seen": 3892480, + "step": 7920 + }, + { + "epoch": 1.045928467731292, + "grad_norm": 0.19685277342796326, + "learning_rate": 1.9282942364839947e-06, + "loss": 0.0013, + "num_input_tokens_seen": 3894784, + "step": 7925 + }, + { + "epoch": 1.0465883595090406, + "grad_norm": 0.08247081935405731, + "learning_rate": 1.9281228289984816e-06, + "loss": 0.0775, + "num_input_tokens_seen": 3897472, + "step": 7930 + }, + { + "epoch": 1.047248251286789, + "grad_norm": 25.526233673095703, + "learning_rate": 1.927951224525619e-06, + "loss": 0.078, + "num_input_tokens_seen": 3900032, + "step": 7935 + }, + { + "epoch": 1.0479081430645374, + "grad_norm": 0.10590098053216934, + "learning_rate": 1.9277794231018286e-06, + "loss": 0.0335, + "num_input_tokens_seen": 3902592, + "step": 7940 + }, + { + "epoch": 1.048568034842286, + "grad_norm": 13.906495094299316, + "learning_rate": 1.927607424763574e-06, + "loss": 0.0915, + "num_input_tokens_seen": 3904896, + "step": 7945 + }, + { + "epoch": 1.0492279266200344, + "grad_norm": 0.04824037477374077, + "learning_rate": 1.927435229547361e-06, + "loss": 0.0017, + "num_input_tokens_seen": 3907200, + "step": 7950 + }, + { + "epoch": 1.0498878183977827, + "grad_norm": 0.19578570127487183, + "learning_rate": 1.9272628374897366e-06, + "loss": 0.0768, + "num_input_tokens_seen": 3909888, + "step": 7955 + }, + { + "epoch": 1.0505477101755312, + "grad_norm": 0.061658911406993866, + "learning_rate": 1.9270902486272892e-06, + "loss": 0.0445, + "num_input_tokens_seen": 3912320, + "step": 7960 + }, + { + "epoch": 1.0512076019532797, + "grad_norm": 0.052046775817871094, + "learning_rate": 1.92691746299665e-06, + "loss": 0.1083, + "num_input_tokens_seen": 3914944, + "step": 7965 + }, + { + "epoch": 1.0518674937310282, + "grad_norm": 0.020027488470077515, + "learning_rate": 1.9267444806344917e-06, + "loss": 0.0833, + "num_input_tokens_seen": 3917376, + "step": 7970 + }, + { + "epoch": 1.0525273855087764, + "grad_norm": 0.0871131494641304, + "learning_rate": 1.9265713015775285e-06, + "loss": 0.0865, + "num_input_tokens_seen": 3919872, + "step": 7975 + }, + { + "epoch": 1.053187277286525, + "grad_norm": 0.11457386612892151, + "learning_rate": 1.926397925862516e-06, + "loss": 0.1148, + "num_input_tokens_seen": 3922368, + "step": 7980 + }, + { + "epoch": 1.0538471690642734, + "grad_norm": 464.58880615234375, + "learning_rate": 1.9262243535262527e-06, + "loss": 0.0591, + "num_input_tokens_seen": 3925056, + "step": 7985 + }, + { + "epoch": 1.054507060842022, + "grad_norm": 91.03417205810547, + "learning_rate": 1.926050584605577e-06, + "loss": 0.2027, + "num_input_tokens_seen": 3927552, + "step": 7990 + }, + { + "epoch": 1.0551669526197704, + "grad_norm": 0.5855996012687683, + "learning_rate": 1.9258766191373706e-06, + "loss": 0.0009, + "num_input_tokens_seen": 3930176, + "step": 7995 + }, + { + "epoch": 1.0558268443975187, + "grad_norm": 0.06435656547546387, + "learning_rate": 1.9257024571585565e-06, + "loss": 0.0005, + "num_input_tokens_seen": 3932672, + "step": 8000 + }, + { + "epoch": 1.0564867361752672, + "grad_norm": 0.12676815688610077, + "learning_rate": 1.9255280987060995e-06, + "loss": 0.2338, + "num_input_tokens_seen": 3934912, + "step": 8005 + }, + { + "epoch": 1.0571466279530157, + "grad_norm": 0.19433751702308655, + "learning_rate": 1.9253535438170056e-06, + "loss": 0.0681, + "num_input_tokens_seen": 3937216, + "step": 8010 + }, + { + "epoch": 1.0578065197307642, + "grad_norm": 18.061725616455078, + "learning_rate": 1.9251787925283228e-06, + "loss": 0.145, + "num_input_tokens_seen": 3939776, + "step": 8015 + }, + { + "epoch": 1.0584664115085125, + "grad_norm": 0.5926772952079773, + "learning_rate": 1.925003844877141e-06, + "loss": 0.1157, + "num_input_tokens_seen": 3941888, + "step": 8020 + }, + { + "epoch": 1.059126303286261, + "grad_norm": 0.10081900656223297, + "learning_rate": 1.9248287009005914e-06, + "loss": 0.0852, + "num_input_tokens_seen": 3944192, + "step": 8025 + }, + { + "epoch": 1.0597861950640095, + "grad_norm": 0.25350770354270935, + "learning_rate": 1.9246533606358475e-06, + "loss": 0.0706, + "num_input_tokens_seen": 3946816, + "step": 8030 + }, + { + "epoch": 1.060446086841758, + "grad_norm": 57.27534484863281, + "learning_rate": 1.9244778241201232e-06, + "loss": 0.1036, + "num_input_tokens_seen": 3949440, + "step": 8035 + }, + { + "epoch": 1.0611059786195065, + "grad_norm": 0.20645537972450256, + "learning_rate": 1.9243020913906753e-06, + "loss": 0.0058, + "num_input_tokens_seen": 3952000, + "step": 8040 + }, + { + "epoch": 1.0617658703972548, + "grad_norm": 0.2704315483570099, + "learning_rate": 1.924126162484802e-06, + "loss": 0.0008, + "num_input_tokens_seen": 3954240, + "step": 8045 + }, + { + "epoch": 1.0624257621750033, + "grad_norm": 0.09804032742977142, + "learning_rate": 1.9239500374398427e-06, + "loss": 0.1045, + "num_input_tokens_seen": 3956608, + "step": 8050 + }, + { + "epoch": 1.0630856539527518, + "grad_norm": 0.4295539855957031, + "learning_rate": 1.9237737162931785e-06, + "loss": 0.0012, + "num_input_tokens_seen": 3959104, + "step": 8055 + }, + { + "epoch": 1.0637455457305003, + "grad_norm": 0.13368825614452362, + "learning_rate": 1.9235971990822323e-06, + "loss": 0.1183, + "num_input_tokens_seen": 3961664, + "step": 8060 + }, + { + "epoch": 1.0644054375082486, + "grad_norm": 38.22637176513672, + "learning_rate": 1.923420485844469e-06, + "loss": 0.0608, + "num_input_tokens_seen": 3964352, + "step": 8065 + }, + { + "epoch": 1.065065329285997, + "grad_norm": 6.1040754318237305, + "learning_rate": 1.9232435766173944e-06, + "loss": 0.0015, + "num_input_tokens_seen": 3966656, + "step": 8070 + }, + { + "epoch": 1.0657252210637456, + "grad_norm": 0.0031956711318343878, + "learning_rate": 1.9230664714385567e-06, + "loss": 0.1006, + "num_input_tokens_seen": 3968896, + "step": 8075 + }, + { + "epoch": 1.066385112841494, + "grad_norm": 0.07407024502754211, + "learning_rate": 1.922889170345544e-06, + "loss": 0.028, + "num_input_tokens_seen": 3971328, + "step": 8080 + }, + { + "epoch": 1.0670450046192423, + "grad_norm": 0.1356084942817688, + "learning_rate": 1.9227116733759883e-06, + "loss": 0.1022, + "num_input_tokens_seen": 3973696, + "step": 8085 + }, + { + "epoch": 1.0677048963969908, + "grad_norm": 56.98714828491211, + "learning_rate": 1.922533980567562e-06, + "loss": 0.1266, + "num_input_tokens_seen": 3976192, + "step": 8090 + }, + { + "epoch": 1.0683647881747393, + "grad_norm": 0.14554879069328308, + "learning_rate": 1.9223560919579782e-06, + "loss": 0.0802, + "num_input_tokens_seen": 3978944, + "step": 8095 + }, + { + "epoch": 1.0690246799524878, + "grad_norm": 0.040467824786901474, + "learning_rate": 1.922178007584993e-06, + "loss": 0.0701, + "num_input_tokens_seen": 3981376, + "step": 8100 + }, + { + "epoch": 1.0696845717302363, + "grad_norm": 0.20177550613880157, + "learning_rate": 1.921999727486404e-06, + "loss": 0.0006, + "num_input_tokens_seen": 3983744, + "step": 8105 + }, + { + "epoch": 1.0703444635079846, + "grad_norm": 0.04099415987730026, + "learning_rate": 1.9218212517000495e-06, + "loss": 0.0604, + "num_input_tokens_seen": 3986048, + "step": 8110 + }, + { + "epoch": 1.0710043552857331, + "grad_norm": 1.1667630672454834, + "learning_rate": 1.9216425802638095e-06, + "loss": 0.0162, + "num_input_tokens_seen": 3988736, + "step": 8115 + }, + { + "epoch": 1.0716642470634816, + "grad_norm": 0.041468545794487, + "learning_rate": 1.9214637132156056e-06, + "loss": 0.046, + "num_input_tokens_seen": 3991360, + "step": 8120 + }, + { + "epoch": 1.0723241388412301, + "grad_norm": 0.10066083073616028, + "learning_rate": 1.9212846505934018e-06, + "loss": 0.0479, + "num_input_tokens_seen": 3994176, + "step": 8125 + }, + { + "epoch": 1.0729840306189784, + "grad_norm": 39.72118377685547, + "learning_rate": 1.921105392435202e-06, + "loss": 0.0754, + "num_input_tokens_seen": 3996416, + "step": 8130 + }, + { + "epoch": 1.073643922396727, + "grad_norm": 0.09259176254272461, + "learning_rate": 1.9209259387790526e-06, + "loss": 0.0023, + "num_input_tokens_seen": 3998976, + "step": 8135 + }, + { + "epoch": 1.0743038141744754, + "grad_norm": 0.008366269990801811, + "learning_rate": 1.920746289663042e-06, + "loss": 0.1178, + "num_input_tokens_seen": 4001344, + "step": 8140 + }, + { + "epoch": 1.074963705952224, + "grad_norm": 0.046617474406957626, + "learning_rate": 1.9205664451252986e-06, + "loss": 0.0384, + "num_input_tokens_seen": 4003712, + "step": 8145 + }, + { + "epoch": 1.0756235977299722, + "grad_norm": 0.5906067490577698, + "learning_rate": 1.9203864052039935e-06, + "loss": 0.0743, + "num_input_tokens_seen": 4006144, + "step": 8150 + }, + { + "epoch": 1.0762834895077207, + "grad_norm": 0.007874682545661926, + "learning_rate": 1.9202061699373386e-06, + "loss": 0.0612, + "num_input_tokens_seen": 4008640, + "step": 8155 + }, + { + "epoch": 1.0769433812854692, + "grad_norm": 0.027073025703430176, + "learning_rate": 1.9200257393635878e-06, + "loss": 0.0445, + "num_input_tokens_seen": 4011456, + "step": 8160 + }, + { + "epoch": 1.0776032730632177, + "grad_norm": 0.06536184996366501, + "learning_rate": 1.9198451135210365e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4014144, + "step": 8165 + }, + { + "epoch": 1.0782631648409662, + "grad_norm": 125.64025115966797, + "learning_rate": 1.919664292448021e-06, + "loss": 0.0969, + "num_input_tokens_seen": 4016320, + "step": 8170 + }, + { + "epoch": 1.0789230566187145, + "grad_norm": 0.006089900620281696, + "learning_rate": 1.9194832761829184e-06, + "loss": 0.0018, + "num_input_tokens_seen": 4019008, + "step": 8175 + }, + { + "epoch": 1.079582948396463, + "grad_norm": 27.950912475585938, + "learning_rate": 1.919302064764149e-06, + "loss": 0.1604, + "num_input_tokens_seen": 4021568, + "step": 8180 + }, + { + "epoch": 1.0802428401742115, + "grad_norm": 0.3938080370426178, + "learning_rate": 1.9191206582301737e-06, + "loss": 0.0614, + "num_input_tokens_seen": 4024064, + "step": 8185 + }, + { + "epoch": 1.08090273195196, + "grad_norm": 0.023270845413208008, + "learning_rate": 1.9189390566194942e-06, + "loss": 0.2029, + "num_input_tokens_seen": 4026496, + "step": 8190 + }, + { + "epoch": 1.0815626237297082, + "grad_norm": 0.251174658536911, + "learning_rate": 1.9187572599706547e-06, + "loss": 0.0022, + "num_input_tokens_seen": 4028992, + "step": 8195 + }, + { + "epoch": 1.0822225155074567, + "grad_norm": 0.6823098063468933, + "learning_rate": 1.9185752683222395e-06, + "loss": 0.2369, + "num_input_tokens_seen": 4031296, + "step": 8200 + }, + { + "epoch": 1.0828824072852052, + "grad_norm": 43.15085983276367, + "learning_rate": 1.9183930817128755e-06, + "loss": 0.0804, + "num_input_tokens_seen": 4033664, + "step": 8205 + }, + { + "epoch": 1.0835422990629537, + "grad_norm": 28.672992706298828, + "learning_rate": 1.9182107001812303e-06, + "loss": 0.0454, + "num_input_tokens_seen": 4036160, + "step": 8210 + }, + { + "epoch": 1.0842021908407022, + "grad_norm": 59.211666107177734, + "learning_rate": 1.9180281237660136e-06, + "loss": 0.0698, + "num_input_tokens_seen": 4038784, + "step": 8215 + }, + { + "epoch": 1.0848620826184505, + "grad_norm": 0.054303571581840515, + "learning_rate": 1.917845352505975e-06, + "loss": 0.0012, + "num_input_tokens_seen": 4041280, + "step": 8220 + }, + { + "epoch": 1.085521974396199, + "grad_norm": 0.20329880714416504, + "learning_rate": 1.917662386439907e-06, + "loss": 0.0258, + "num_input_tokens_seen": 4043712, + "step": 8225 + }, + { + "epoch": 1.0861818661739475, + "grad_norm": 1.4719992876052856, + "learning_rate": 1.9174792256066427e-06, + "loss": 0.0015, + "num_input_tokens_seen": 4045824, + "step": 8230 + }, + { + "epoch": 1.086841757951696, + "grad_norm": 0.0649583712220192, + "learning_rate": 1.9172958700450565e-06, + "loss": 0.0688, + "num_input_tokens_seen": 4048320, + "step": 8235 + }, + { + "epoch": 1.0875016497294443, + "grad_norm": 1.4787904024124146, + "learning_rate": 1.9171123197940647e-06, + "loss": 0.0083, + "num_input_tokens_seen": 4050688, + "step": 8240 + }, + { + "epoch": 1.0881615415071928, + "grad_norm": 0.20488137006759644, + "learning_rate": 1.916928574892624e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4053312, + "step": 8245 + }, + { + "epoch": 1.0888214332849413, + "grad_norm": 0.019268441945314407, + "learning_rate": 1.9167446353797334e-06, + "loss": 0.0955, + "num_input_tokens_seen": 4055872, + "step": 8250 + }, + { + "epoch": 1.0894813250626898, + "grad_norm": 0.0886877030134201, + "learning_rate": 1.9165605012944322e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4058304, + "step": 8255 + }, + { + "epoch": 1.090141216840438, + "grad_norm": 16.01343536376953, + "learning_rate": 1.916376172675802e-06, + "loss": 0.1845, + "num_input_tokens_seen": 4060800, + "step": 8260 + }, + { + "epoch": 1.0908011086181866, + "grad_norm": 0.229745015501976, + "learning_rate": 1.916191649562965e-06, + "loss": 0.1351, + "num_input_tokens_seen": 4063488, + "step": 8265 + }, + { + "epoch": 1.091461000395935, + "grad_norm": 0.015055251307785511, + "learning_rate": 1.9160069319950844e-06, + "loss": 0.0002, + "num_input_tokens_seen": 4066176, + "step": 8270 + }, + { + "epoch": 1.0921208921736836, + "grad_norm": 0.02369670942425728, + "learning_rate": 1.915822020011366e-06, + "loss": 0.0815, + "num_input_tokens_seen": 4068864, + "step": 8275 + }, + { + "epoch": 1.092780783951432, + "grad_norm": 0.34016939997673035, + "learning_rate": 1.915636913651056e-06, + "loss": 0.0007, + "num_input_tokens_seen": 4071360, + "step": 8280 + }, + { + "epoch": 1.0934406757291804, + "grad_norm": 0.006058728788048029, + "learning_rate": 1.9154516129534414e-06, + "loss": 0.0924, + "num_input_tokens_seen": 4074112, + "step": 8285 + }, + { + "epoch": 1.0941005675069289, + "grad_norm": 0.054655253887176514, + "learning_rate": 1.915266117957851e-06, + "loss": 0.0658, + "num_input_tokens_seen": 4076480, + "step": 8290 + }, + { + "epoch": 1.0947604592846774, + "grad_norm": 0.012565754354000092, + "learning_rate": 1.915080428703655e-06, + "loss": 0.0017, + "num_input_tokens_seen": 4078976, + "step": 8295 + }, + { + "epoch": 1.0954203510624259, + "grad_norm": 0.027407482266426086, + "learning_rate": 1.9148945452302647e-06, + "loss": 0.1423, + "num_input_tokens_seen": 4081664, + "step": 8300 + }, + { + "epoch": 1.0960802428401741, + "grad_norm": 0.10399171710014343, + "learning_rate": 1.9147084675771322e-06, + "loss": 0.1065, + "num_input_tokens_seen": 4084288, + "step": 8305 + }, + { + "epoch": 1.0967401346179226, + "grad_norm": 0.014594976790249348, + "learning_rate": 1.9145221957837513e-06, + "loss": 0.0763, + "num_input_tokens_seen": 4086656, + "step": 8310 + }, + { + "epoch": 1.0974000263956711, + "grad_norm": 0.027905648574233055, + "learning_rate": 1.9143357298896564e-06, + "loss": 0.066, + "num_input_tokens_seen": 4089152, + "step": 8315 + }, + { + "epoch": 1.0980599181734196, + "grad_norm": 0.026191938668489456, + "learning_rate": 1.9141490699344243e-06, + "loss": 0.0539, + "num_input_tokens_seen": 4091584, + "step": 8320 + }, + { + "epoch": 1.098719809951168, + "grad_norm": 31.117815017700195, + "learning_rate": 1.913962215957672e-06, + "loss": 0.0029, + "num_input_tokens_seen": 4093888, + "step": 8325 + }, + { + "epoch": 1.0993797017289164, + "grad_norm": 20.938081741333008, + "learning_rate": 1.9137751679990576e-06, + "loss": 0.0022, + "num_input_tokens_seen": 4096256, + "step": 8330 + }, + { + "epoch": 1.100039593506665, + "grad_norm": 15.29064655303955, + "learning_rate": 1.9135879260982806e-06, + "loss": 0.1069, + "num_input_tokens_seen": 4098816, + "step": 8335 + }, + { + "epoch": 1.1006994852844134, + "grad_norm": 0.03777789697051048, + "learning_rate": 1.9134004902950826e-06, + "loss": 0.1271, + "num_input_tokens_seen": 4101184, + "step": 8340 + }, + { + "epoch": 1.101359377062162, + "grad_norm": 0.26297852396965027, + "learning_rate": 1.913212860629244e-06, + "loss": 0.1017, + "num_input_tokens_seen": 4103488, + "step": 8345 + }, + { + "epoch": 1.1020192688399102, + "grad_norm": 1.434247374534607, + "learning_rate": 1.9130250371405895e-06, + "loss": 0.1092, + "num_input_tokens_seen": 4105792, + "step": 8350 + }, + { + "epoch": 1.1026791606176587, + "grad_norm": 0.41510826349258423, + "learning_rate": 1.912837019868982e-06, + "loss": 0.0289, + "num_input_tokens_seen": 4108416, + "step": 8355 + }, + { + "epoch": 1.1033390523954072, + "grad_norm": 0.07832839339971542, + "learning_rate": 1.9126488088543273e-06, + "loss": 0.0011, + "num_input_tokens_seen": 4110976, + "step": 8360 + }, + { + "epoch": 1.1039989441731557, + "grad_norm": 24.100696563720703, + "learning_rate": 1.912460404136572e-06, + "loss": 0.1147, + "num_input_tokens_seen": 4113536, + "step": 8365 + }, + { + "epoch": 1.104658835950904, + "grad_norm": 137.2346954345703, + "learning_rate": 1.912271805755703e-06, + "loss": 0.1414, + "num_input_tokens_seen": 4115840, + "step": 8370 + }, + { + "epoch": 1.1053187277286525, + "grad_norm": 0.1459578573703766, + "learning_rate": 1.9120830137517498e-06, + "loss": 0.0527, + "num_input_tokens_seen": 4118208, + "step": 8375 + }, + { + "epoch": 1.105978619506401, + "grad_norm": 0.1875588446855545, + "learning_rate": 1.9118940281647816e-06, + "loss": 0.0102, + "num_input_tokens_seen": 4120448, + "step": 8380 + }, + { + "epoch": 1.1066385112841495, + "grad_norm": 65.89623260498047, + "learning_rate": 1.9117048490349096e-06, + "loss": 0.1928, + "num_input_tokens_seen": 4122560, + "step": 8385 + }, + { + "epoch": 1.1072984030618978, + "grad_norm": 0.024722224101424217, + "learning_rate": 1.9115154764022852e-06, + "loss": 0.0306, + "num_input_tokens_seen": 4125120, + "step": 8390 + }, + { + "epoch": 1.1079582948396463, + "grad_norm": 0.13537687063217163, + "learning_rate": 1.9113259103071015e-06, + "loss": 0.0867, + "num_input_tokens_seen": 4127360, + "step": 8395 + }, + { + "epoch": 1.1086181866173948, + "grad_norm": 0.12146171927452087, + "learning_rate": 1.9111361507895925e-06, + "loss": 0.1242, + "num_input_tokens_seen": 4129664, + "step": 8400 + }, + { + "epoch": 1.1092780783951433, + "grad_norm": 139.49085998535156, + "learning_rate": 1.9109461978900342e-06, + "loss": 0.082, + "num_input_tokens_seen": 4132032, + "step": 8405 + }, + { + "epoch": 1.1099379701728918, + "grad_norm": 0.11550889164209366, + "learning_rate": 1.910756051648741e-06, + "loss": 0.0745, + "num_input_tokens_seen": 4134528, + "step": 8410 + }, + { + "epoch": 1.11059786195064, + "grad_norm": 0.04432675242424011, + "learning_rate": 1.9105657121060715e-06, + "loss": 0.001, + "num_input_tokens_seen": 4137280, + "step": 8415 + }, + { + "epoch": 1.1112577537283885, + "grad_norm": 0.056893620640039444, + "learning_rate": 1.9103751793024236e-06, + "loss": 0.0026, + "num_input_tokens_seen": 4139776, + "step": 8420 + }, + { + "epoch": 1.111917645506137, + "grad_norm": 0.08899568021297455, + "learning_rate": 1.9101844532782357e-06, + "loss": 0.0961, + "num_input_tokens_seen": 4142144, + "step": 8425 + }, + { + "epoch": 1.1125775372838855, + "grad_norm": 0.10331233590841293, + "learning_rate": 1.909993534073989e-06, + "loss": 0.0057, + "num_input_tokens_seen": 4144768, + "step": 8430 + }, + { + "epoch": 1.1132374290616338, + "grad_norm": 0.07069068402051926, + "learning_rate": 1.9098024217302043e-06, + "loss": 0.0725, + "num_input_tokens_seen": 4147008, + "step": 8435 + }, + { + "epoch": 1.1138973208393823, + "grad_norm": 0.03294684365391731, + "learning_rate": 1.909611116287444e-06, + "loss": 0.0589, + "num_input_tokens_seen": 4148992, + "step": 8440 + }, + { + "epoch": 1.1145572126171308, + "grad_norm": 0.02010115422308445, + "learning_rate": 1.909419617786311e-06, + "loss": 0.0708, + "num_input_tokens_seen": 4151552, + "step": 8445 + }, + { + "epoch": 1.1152171043948793, + "grad_norm": 0.12367430329322815, + "learning_rate": 1.90922792626745e-06, + "loss": 0.1095, + "num_input_tokens_seen": 4154176, + "step": 8450 + }, + { + "epoch": 1.1158769961726276, + "grad_norm": 0.40327343344688416, + "learning_rate": 1.9090360417715454e-06, + "loss": 0.1106, + "num_input_tokens_seen": 4156736, + "step": 8455 + }, + { + "epoch": 1.116536887950376, + "grad_norm": 0.05257925018668175, + "learning_rate": 1.9088439643393236e-06, + "loss": 0.0834, + "num_input_tokens_seen": 4158976, + "step": 8460 + }, + { + "epoch": 1.1171967797281246, + "grad_norm": 0.18939876556396484, + "learning_rate": 1.9086516940115518e-06, + "loss": 0.001, + "num_input_tokens_seen": 4161280, + "step": 8465 + }, + { + "epoch": 1.117856671505873, + "grad_norm": 0.05031445994973183, + "learning_rate": 1.908459230829038e-06, + "loss": 0.0794, + "num_input_tokens_seen": 4163776, + "step": 8470 + }, + { + "epoch": 1.1185165632836216, + "grad_norm": 13.425889015197754, + "learning_rate": 1.908266574832631e-06, + "loss": 0.1537, + "num_input_tokens_seen": 4166336, + "step": 8475 + }, + { + "epoch": 1.1191764550613699, + "grad_norm": 0.010042755864560604, + "learning_rate": 1.90807372606322e-06, + "loss": 0.1069, + "num_input_tokens_seen": 4168832, + "step": 8480 + }, + { + "epoch": 1.1198363468391184, + "grad_norm": 0.17297986149787903, + "learning_rate": 1.9078806845617372e-06, + "loss": 0.0022, + "num_input_tokens_seen": 4171520, + "step": 8485 + }, + { + "epoch": 1.1204962386168669, + "grad_norm": 30.04994010925293, + "learning_rate": 1.907687450369153e-06, + "loss": 0.0587, + "num_input_tokens_seen": 4174208, + "step": 8490 + }, + { + "epoch": 1.1211561303946154, + "grad_norm": 2.6952767372131348, + "learning_rate": 1.9074940235264805e-06, + "loss": 0.1138, + "num_input_tokens_seen": 4176512, + "step": 8495 + }, + { + "epoch": 1.1218160221723636, + "grad_norm": 0.12238600105047226, + "learning_rate": 1.9073004040747732e-06, + "loss": 0.0606, + "num_input_tokens_seen": 4179072, + "step": 8500 + }, + { + "epoch": 1.1224759139501121, + "grad_norm": 0.33392786979675293, + "learning_rate": 1.9071065920551254e-06, + "loss": 0.0689, + "num_input_tokens_seen": 4181568, + "step": 8505 + }, + { + "epoch": 1.1231358057278606, + "grad_norm": 33.69342041015625, + "learning_rate": 1.906912587508672e-06, + "loss": 0.0378, + "num_input_tokens_seen": 4184000, + "step": 8510 + }, + { + "epoch": 1.1237956975056091, + "grad_norm": 0.23329313099384308, + "learning_rate": 1.9067183904765893e-06, + "loss": 0.0591, + "num_input_tokens_seen": 4186240, + "step": 8515 + }, + { + "epoch": 1.1244555892833574, + "grad_norm": 0.11479724198579788, + "learning_rate": 1.9065240010000942e-06, + "loss": 0.0016, + "num_input_tokens_seen": 4188544, + "step": 8520 + }, + { + "epoch": 1.125115481061106, + "grad_norm": 0.02210923284292221, + "learning_rate": 1.9063294191204442e-06, + "loss": 0.1241, + "num_input_tokens_seen": 4191168, + "step": 8525 + }, + { + "epoch": 1.1257753728388544, + "grad_norm": 0.056093212217092514, + "learning_rate": 1.9061346448789383e-06, + "loss": 0.2255, + "num_input_tokens_seen": 4193984, + "step": 8530 + }, + { + "epoch": 1.126435264616603, + "grad_norm": 0.026619018986821175, + "learning_rate": 1.9059396783169157e-06, + "loss": 0.001, + "num_input_tokens_seen": 4196928, + "step": 8535 + }, + { + "epoch": 1.1270951563943514, + "grad_norm": 0.1786504089832306, + "learning_rate": 1.9057445194757566e-06, + "loss": 0.0009, + "num_input_tokens_seen": 4199424, + "step": 8540 + }, + { + "epoch": 1.1277550481720997, + "grad_norm": 0.024085398763418198, + "learning_rate": 1.9055491683968822e-06, + "loss": 0.1215, + "num_input_tokens_seen": 4201600, + "step": 8545 + }, + { + "epoch": 1.1284149399498482, + "grad_norm": 0.051518410444259644, + "learning_rate": 1.9053536251217544e-06, + "loss": 0.0429, + "num_input_tokens_seen": 4203968, + "step": 8550 + }, + { + "epoch": 1.1290748317275967, + "grad_norm": 0.07784697413444519, + "learning_rate": 1.9051578896918756e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4206336, + "step": 8555 + }, + { + "epoch": 1.1297347235053452, + "grad_norm": 0.02852809801697731, + "learning_rate": 1.9049619621487894e-06, + "loss": 0.0588, + "num_input_tokens_seen": 4208960, + "step": 8560 + }, + { + "epoch": 1.1303946152830935, + "grad_norm": 58.111297607421875, + "learning_rate": 1.9047658425340798e-06, + "loss": 0.0102, + "num_input_tokens_seen": 4211200, + "step": 8565 + }, + { + "epoch": 1.131054507060842, + "grad_norm": 0.13125121593475342, + "learning_rate": 1.904569530889372e-06, + "loss": 0.0002, + "num_input_tokens_seen": 4213824, + "step": 8570 + }, + { + "epoch": 1.1317143988385905, + "grad_norm": 0.06383427232503891, + "learning_rate": 1.9043730272563319e-06, + "loss": 0.0023, + "num_input_tokens_seen": 4216192, + "step": 8575 + }, + { + "epoch": 1.132374290616339, + "grad_norm": 0.1832188367843628, + "learning_rate": 1.9041763316766653e-06, + "loss": 0.0643, + "num_input_tokens_seen": 4218304, + "step": 8580 + }, + { + "epoch": 1.1330341823940873, + "grad_norm": 0.47467440366744995, + "learning_rate": 1.90397944419212e-06, + "loss": 0.0723, + "num_input_tokens_seen": 4220608, + "step": 8585 + }, + { + "epoch": 1.1336940741718358, + "grad_norm": 46.288822174072266, + "learning_rate": 1.9037823648444839e-06, + "loss": 0.1629, + "num_input_tokens_seen": 4223040, + "step": 8590 + }, + { + "epoch": 1.1343539659495843, + "grad_norm": 0.02950097993016243, + "learning_rate": 1.9035850936755855e-06, + "loss": 0.0002, + "num_input_tokens_seen": 4225664, + "step": 8595 + }, + { + "epoch": 1.1350138577273328, + "grad_norm": 0.01285611279308796, + "learning_rate": 1.9033876307272941e-06, + "loss": 0.1971, + "num_input_tokens_seen": 4228224, + "step": 8600 + }, + { + "epoch": 1.1356737495050813, + "grad_norm": 0.030775396153330803, + "learning_rate": 1.9031899760415198e-06, + "loss": 0.0001, + "num_input_tokens_seen": 4230784, + "step": 8605 + }, + { + "epoch": 1.1363336412828295, + "grad_norm": 13.63304615020752, + "learning_rate": 1.9029921296602139e-06, + "loss": 0.1488, + "num_input_tokens_seen": 4233216, + "step": 8610 + }, + { + "epoch": 1.136993533060578, + "grad_norm": 0.507597804069519, + "learning_rate": 1.9027940916253668e-06, + "loss": 0.0005, + "num_input_tokens_seen": 4235584, + "step": 8615 + }, + { + "epoch": 1.1376534248383265, + "grad_norm": 0.1052989810705185, + "learning_rate": 1.9025958619790118e-06, + "loss": 0.112, + "num_input_tokens_seen": 4237952, + "step": 8620 + }, + { + "epoch": 1.138313316616075, + "grad_norm": 0.05876855179667473, + "learning_rate": 1.902397440763221e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4240768, + "step": 8625 + }, + { + "epoch": 1.1389732083938233, + "grad_norm": 0.09319160133600235, + "learning_rate": 1.9021988280201083e-06, + "loss": 0.2625, + "num_input_tokens_seen": 4243072, + "step": 8630 + }, + { + "epoch": 1.1396331001715718, + "grad_norm": 0.09952009469270706, + "learning_rate": 1.9020000237918273e-06, + "loss": 0.003, + "num_input_tokens_seen": 4245632, + "step": 8635 + }, + { + "epoch": 1.1402929919493203, + "grad_norm": 0.1723238229751587, + "learning_rate": 1.9018010281205727e-06, + "loss": 0.0461, + "num_input_tokens_seen": 4248064, + "step": 8640 + }, + { + "epoch": 1.1409528837270688, + "grad_norm": 0.16310635209083557, + "learning_rate": 1.9016018410485809e-06, + "loss": 0.0676, + "num_input_tokens_seen": 4250496, + "step": 8645 + }, + { + "epoch": 1.141612775504817, + "grad_norm": 17.923866271972656, + "learning_rate": 1.901402462618127e-06, + "loss": 0.3759, + "num_input_tokens_seen": 4253120, + "step": 8650 + }, + { + "epoch": 1.1422726672825656, + "grad_norm": 13.28976821899414, + "learning_rate": 1.9012028928715272e-06, + "loss": 0.2103, + "num_input_tokens_seen": 4255680, + "step": 8655 + }, + { + "epoch": 1.142932559060314, + "grad_norm": 0.06356354802846909, + "learning_rate": 1.9010031318511401e-06, + "loss": 0.0168, + "num_input_tokens_seen": 4257984, + "step": 8660 + }, + { + "epoch": 1.1435924508380626, + "grad_norm": 0.1584162563085556, + "learning_rate": 1.9008031795993627e-06, + "loss": 0.0013, + "num_input_tokens_seen": 4260224, + "step": 8665 + }, + { + "epoch": 1.144252342615811, + "grad_norm": 0.317395955324173, + "learning_rate": 1.9006030361586337e-06, + "loss": 0.0495, + "num_input_tokens_seen": 4262656, + "step": 8670 + }, + { + "epoch": 1.1449122343935594, + "grad_norm": 12.65967082977295, + "learning_rate": 1.9004027015714315e-06, + "loss": 0.0547, + "num_input_tokens_seen": 4264960, + "step": 8675 + }, + { + "epoch": 1.1455721261713079, + "grad_norm": 0.35858747363090515, + "learning_rate": 1.9002021758802762e-06, + "loss": 0.0832, + "num_input_tokens_seen": 4267456, + "step": 8680 + }, + { + "epoch": 1.1462320179490564, + "grad_norm": 0.145728200674057, + "learning_rate": 1.900001459127728e-06, + "loss": 0.0731, + "num_input_tokens_seen": 4269760, + "step": 8685 + }, + { + "epoch": 1.1468919097268049, + "grad_norm": 78.33462524414062, + "learning_rate": 1.8998005513563872e-06, + "loss": 0.031, + "num_input_tokens_seen": 4272384, + "step": 8690 + }, + { + "epoch": 1.1475518015045532, + "grad_norm": 0.28123587369918823, + "learning_rate": 1.8995994526088955e-06, + "loss": 0.0028, + "num_input_tokens_seen": 4275008, + "step": 8695 + }, + { + "epoch": 1.1482116932823017, + "grad_norm": 0.04340027645230293, + "learning_rate": 1.8993981629279342e-06, + "loss": 0.014, + "num_input_tokens_seen": 4277440, + "step": 8700 + }, + { + "epoch": 1.1488715850600502, + "grad_norm": 0.034418463706970215, + "learning_rate": 1.8991966823562258e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4279744, + "step": 8705 + }, + { + "epoch": 1.1495314768377987, + "grad_norm": 162.60955810546875, + "learning_rate": 1.8989950109365328e-06, + "loss": 0.1334, + "num_input_tokens_seen": 4282048, + "step": 8710 + }, + { + "epoch": 1.150191368615547, + "grad_norm": 0.05149710550904274, + "learning_rate": 1.8987931487116591e-06, + "loss": 0.0581, + "num_input_tokens_seen": 4284288, + "step": 8715 + }, + { + "epoch": 1.1508512603932954, + "grad_norm": 0.03445148840546608, + "learning_rate": 1.898591095724448e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4286976, + "step": 8720 + }, + { + "epoch": 1.151511152171044, + "grad_norm": 0.014576703310012817, + "learning_rate": 1.898388852017784e-06, + "loss": 0.0615, + "num_input_tokens_seen": 4289472, + "step": 8725 + }, + { + "epoch": 1.1521710439487924, + "grad_norm": 0.027082012966275215, + "learning_rate": 1.8981864176345914e-06, + "loss": 0.0752, + "num_input_tokens_seen": 4292160, + "step": 8730 + }, + { + "epoch": 1.152830935726541, + "grad_norm": 18.13522720336914, + "learning_rate": 1.8979837926178362e-06, + "loss": 0.1534, + "num_input_tokens_seen": 4294528, + "step": 8735 + }, + { + "epoch": 1.1534908275042892, + "grad_norm": 0.006116439588367939, + "learning_rate": 1.8977809770105235e-06, + "loss": 0.0736, + "num_input_tokens_seen": 4297152, + "step": 8740 + }, + { + "epoch": 1.1541507192820377, + "grad_norm": 8.04682731628418, + "learning_rate": 1.8975779708556998e-06, + "loss": 0.0011, + "num_input_tokens_seen": 4299968, + "step": 8745 + }, + { + "epoch": 1.1548106110597862, + "grad_norm": 102.7171401977539, + "learning_rate": 1.8973747741964515e-06, + "loss": 0.0063, + "num_input_tokens_seen": 4302144, + "step": 8750 + }, + { + "epoch": 1.1554705028375347, + "grad_norm": 0.03771144151687622, + "learning_rate": 1.8971713870759057e-06, + "loss": 0.0892, + "num_input_tokens_seen": 4304576, + "step": 8755 + }, + { + "epoch": 1.156130394615283, + "grad_norm": 37.372928619384766, + "learning_rate": 1.8969678095372296e-06, + "loss": 0.0785, + "num_input_tokens_seen": 4306816, + "step": 8760 + }, + { + "epoch": 1.1567902863930315, + "grad_norm": 24.924997329711914, + "learning_rate": 1.8967640416236313e-06, + "loss": 0.1083, + "num_input_tokens_seen": 4309440, + "step": 8765 + }, + { + "epoch": 1.15745017817078, + "grad_norm": 0.10889468342065811, + "learning_rate": 1.8965600833783594e-06, + "loss": 0.121, + "num_input_tokens_seen": 4312000, + "step": 8770 + }, + { + "epoch": 1.1581100699485285, + "grad_norm": 36.86738586425781, + "learning_rate": 1.8963559348447015e-06, + "loss": 0.1331, + "num_input_tokens_seen": 4314432, + "step": 8775 + }, + { + "epoch": 1.1587699617262768, + "grad_norm": 6.531122207641602, + "learning_rate": 1.8961515960659878e-06, + "loss": 0.0048, + "num_input_tokens_seen": 4316928, + "step": 8780 + }, + { + "epoch": 1.1594298535040253, + "grad_norm": 122.35628509521484, + "learning_rate": 1.8959470670855873e-06, + "loss": 0.1, + "num_input_tokens_seen": 4319168, + "step": 8785 + }, + { + "epoch": 1.1600897452817738, + "grad_norm": 15.543755531311035, + "learning_rate": 1.8957423479469095e-06, + "loss": 0.1709, + "num_input_tokens_seen": 4321344, + "step": 8790 + }, + { + "epoch": 1.1607496370595223, + "grad_norm": 116.81075286865234, + "learning_rate": 1.8955374386934049e-06, + "loss": 0.0571, + "num_input_tokens_seen": 4323904, + "step": 8795 + }, + { + "epoch": 1.1614095288372708, + "grad_norm": 0.28201350569725037, + "learning_rate": 1.895332339368564e-06, + "loss": 0.0995, + "num_input_tokens_seen": 4326272, + "step": 8800 + }, + { + "epoch": 1.162069420615019, + "grad_norm": 0.05971250683069229, + "learning_rate": 1.8951270500159176e-06, + "loss": 0.0573, + "num_input_tokens_seen": 4329024, + "step": 8805 + }, + { + "epoch": 1.1627293123927676, + "grad_norm": 15.558816909790039, + "learning_rate": 1.8949215706790364e-06, + "loss": 0.0971, + "num_input_tokens_seen": 4331328, + "step": 8810 + }, + { + "epoch": 1.163389204170516, + "grad_norm": 109.42797088623047, + "learning_rate": 1.8947159014015326e-06, + "loss": 0.1817, + "num_input_tokens_seen": 4333696, + "step": 8815 + }, + { + "epoch": 1.1640490959482646, + "grad_norm": 0.2136552333831787, + "learning_rate": 1.8945100422270578e-06, + "loss": 0.1063, + "num_input_tokens_seen": 4336320, + "step": 8820 + }, + { + "epoch": 1.164708987726013, + "grad_norm": 2.6217916011810303, + "learning_rate": 1.8943039931993043e-06, + "loss": 0.0024, + "num_input_tokens_seen": 4338688, + "step": 8825 + }, + { + "epoch": 1.1653688795037613, + "grad_norm": 13.55349063873291, + "learning_rate": 1.8940977543620038e-06, + "loss": 0.078, + "num_input_tokens_seen": 4341312, + "step": 8830 + }, + { + "epoch": 1.1660287712815098, + "grad_norm": 0.13286477327346802, + "learning_rate": 1.89389132575893e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4343808, + "step": 8835 + }, + { + "epoch": 1.1666886630592583, + "grad_norm": 31.39531707763672, + "learning_rate": 1.8936847074338948e-06, + "loss": 0.0904, + "num_input_tokens_seen": 4346176, + "step": 8840 + }, + { + "epoch": 1.1673485548370066, + "grad_norm": 0.0839921236038208, + "learning_rate": 1.8934778994307526e-06, + "loss": 0.0682, + "num_input_tokens_seen": 4348672, + "step": 8845 + }, + { + "epoch": 1.1680084466147551, + "grad_norm": 0.012714563868939877, + "learning_rate": 1.8932709017933958e-06, + "loss": 0.1466, + "num_input_tokens_seen": 4350976, + "step": 8850 + }, + { + "epoch": 1.1686683383925036, + "grad_norm": 0.12429999560117722, + "learning_rate": 1.8930637145657592e-06, + "loss": 0.0004, + "num_input_tokens_seen": 4353536, + "step": 8855 + }, + { + "epoch": 1.1693282301702521, + "grad_norm": 0.08413698524236679, + "learning_rate": 1.8928563377918157e-06, + "loss": 0.1143, + "num_input_tokens_seen": 4355712, + "step": 8860 + }, + { + "epoch": 1.1699881219480006, + "grad_norm": 0.050988439470529556, + "learning_rate": 1.8926487715155802e-06, + "loss": 0.0635, + "num_input_tokens_seen": 4358336, + "step": 8865 + }, + { + "epoch": 1.170648013725749, + "grad_norm": 1.0589361190795898, + "learning_rate": 1.892441015781107e-06, + "loss": 0.0941, + "num_input_tokens_seen": 4360896, + "step": 8870 + }, + { + "epoch": 1.1713079055034974, + "grad_norm": 0.09044911712408066, + "learning_rate": 1.892233070632491e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4363456, + "step": 8875 + }, + { + "epoch": 1.171967797281246, + "grad_norm": 14.525789260864258, + "learning_rate": 1.8920249361138665e-06, + "loss": 0.1365, + "num_input_tokens_seen": 4365760, + "step": 8880 + }, + { + "epoch": 1.1726276890589944, + "grad_norm": 0.1123279482126236, + "learning_rate": 1.891816612269409e-06, + "loss": 0.0024, + "num_input_tokens_seen": 4368192, + "step": 8885 + }, + { + "epoch": 1.173287580836743, + "grad_norm": 0.02701779454946518, + "learning_rate": 1.8916080991433337e-06, + "loss": 0.0928, + "num_input_tokens_seen": 4370752, + "step": 8890 + }, + { + "epoch": 1.1739474726144912, + "grad_norm": 0.06008841097354889, + "learning_rate": 1.8913993967798956e-06, + "loss": 0.0007, + "num_input_tokens_seen": 4373376, + "step": 8895 + }, + { + "epoch": 1.1746073643922397, + "grad_norm": 0.14545901119709015, + "learning_rate": 1.8911905052233905e-06, + "loss": 0.0492, + "num_input_tokens_seen": 4376000, + "step": 8900 + }, + { + "epoch": 1.1752672561699882, + "grad_norm": 31.391544342041016, + "learning_rate": 1.8909814245181543e-06, + "loss": 0.1225, + "num_input_tokens_seen": 4378496, + "step": 8905 + }, + { + "epoch": 1.1759271479477365, + "grad_norm": 11.671570777893066, + "learning_rate": 1.890772154708563e-06, + "loss": 0.0383, + "num_input_tokens_seen": 4380672, + "step": 8910 + }, + { + "epoch": 1.176587039725485, + "grad_norm": 18.109888076782227, + "learning_rate": 1.8905626958390317e-06, + "loss": 0.1929, + "num_input_tokens_seen": 4383168, + "step": 8915 + }, + { + "epoch": 1.1772469315032335, + "grad_norm": 0.048839978873729706, + "learning_rate": 1.8903530479540176e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4385472, + "step": 8920 + }, + { + "epoch": 1.177906823280982, + "grad_norm": 0.05667317286133766, + "learning_rate": 1.8901432110980164e-06, + "loss": 0.0004, + "num_input_tokens_seen": 4387840, + "step": 8925 + }, + { + "epoch": 1.1785667150587305, + "grad_norm": 12.719619750976562, + "learning_rate": 1.8899331853155648e-06, + "loss": 0.1419, + "num_input_tokens_seen": 4390336, + "step": 8930 + }, + { + "epoch": 1.1792266068364787, + "grad_norm": 11.942977905273438, + "learning_rate": 1.8897229706512387e-06, + "loss": 0.099, + "num_input_tokens_seen": 4392640, + "step": 8935 + }, + { + "epoch": 1.1798864986142272, + "grad_norm": 0.24283498525619507, + "learning_rate": 1.889512567149655e-06, + "loss": 0.1339, + "num_input_tokens_seen": 4395136, + "step": 8940 + }, + { + "epoch": 1.1805463903919757, + "grad_norm": 0.12784871459007263, + "learning_rate": 1.88930197485547e-06, + "loss": 0.0047, + "num_input_tokens_seen": 4397504, + "step": 8945 + }, + { + "epoch": 1.1812062821697242, + "grad_norm": 4.140740871429443, + "learning_rate": 1.8890911938133814e-06, + "loss": 0.0047, + "num_input_tokens_seen": 4399872, + "step": 8950 + }, + { + "epoch": 1.1818661739474727, + "grad_norm": 0.3176402151584625, + "learning_rate": 1.8888802240681248e-06, + "loss": 0.0867, + "num_input_tokens_seen": 4402048, + "step": 8955 + }, + { + "epoch": 1.182526065725221, + "grad_norm": 47.93214797973633, + "learning_rate": 1.888669065664477e-06, + "loss": 0.003, + "num_input_tokens_seen": 4404416, + "step": 8960 + }, + { + "epoch": 1.1831859575029695, + "grad_norm": 0.04862954467535019, + "learning_rate": 1.8884577186472557e-06, + "loss": 0.0207, + "num_input_tokens_seen": 4406720, + "step": 8965 + }, + { + "epoch": 1.183845849280718, + "grad_norm": 0.016081402078270912, + "learning_rate": 1.8882461830613173e-06, + "loss": 0.1395, + "num_input_tokens_seen": 4408896, + "step": 8970 + }, + { + "epoch": 1.1845057410584663, + "grad_norm": 0.04223987087607384, + "learning_rate": 1.8880344589515587e-06, + "loss": 0.0004, + "num_input_tokens_seen": 4411392, + "step": 8975 + }, + { + "epoch": 1.1851656328362148, + "grad_norm": 0.0231131874024868, + "learning_rate": 1.887822546362917e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4413888, + "step": 8980 + }, + { + "epoch": 1.1858255246139633, + "grad_norm": 0.03339609131217003, + "learning_rate": 1.8876104453403686e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4416384, + "step": 8985 + }, + { + "epoch": 1.1864854163917118, + "grad_norm": 0.1902741938829422, + "learning_rate": 1.8873981559289308e-06, + "loss": 0.21, + "num_input_tokens_seen": 4419136, + "step": 8990 + }, + { + "epoch": 1.1871453081694603, + "grad_norm": 0.2967088520526886, + "learning_rate": 1.8871856781736604e-06, + "loss": 0.0786, + "num_input_tokens_seen": 4421632, + "step": 8995 + }, + { + "epoch": 1.1878051999472086, + "grad_norm": 0.09986329078674316, + "learning_rate": 1.8869730121196542e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4424320, + "step": 9000 + }, + { + "epoch": 1.188465091724957, + "grad_norm": 17.65159034729004, + "learning_rate": 1.8867601578120495e-06, + "loss": 0.1348, + "num_input_tokens_seen": 4426880, + "step": 9005 + }, + { + "epoch": 1.1891249835027056, + "grad_norm": 0.12928028404712677, + "learning_rate": 1.8865471152960225e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4429376, + "step": 9010 + }, + { + "epoch": 1.189784875280454, + "grad_norm": 0.02794799394905567, + "learning_rate": 1.8863338846167905e-06, + "loss": 0.1167, + "num_input_tokens_seen": 4432064, + "step": 9015 + }, + { + "epoch": 1.1904447670582026, + "grad_norm": 0.15142269432544708, + "learning_rate": 1.8861204658196095e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4434304, + "step": 9020 + }, + { + "epoch": 1.1911046588359508, + "grad_norm": 0.18032433092594147, + "learning_rate": 1.8859068589497765e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4436544, + "step": 9025 + }, + { + "epoch": 1.1917645506136993, + "grad_norm": 0.06488180160522461, + "learning_rate": 1.8856930640526277e-06, + "loss": 0.0947, + "num_input_tokens_seen": 4439168, + "step": 9030 + }, + { + "epoch": 1.1924244423914478, + "grad_norm": 0.04269060865044594, + "learning_rate": 1.88547908117354e-06, + "loss": 0.0814, + "num_input_tokens_seen": 4441472, + "step": 9035 + }, + { + "epoch": 1.1930843341691963, + "grad_norm": 0.12657848000526428, + "learning_rate": 1.8852649103579292e-06, + "loss": 0.2709, + "num_input_tokens_seen": 4443840, + "step": 9040 + }, + { + "epoch": 1.1937442259469446, + "grad_norm": 0.024857914075255394, + "learning_rate": 1.885050551651252e-06, + "loss": 0.0662, + "num_input_tokens_seen": 4446016, + "step": 9045 + }, + { + "epoch": 1.1944041177246931, + "grad_norm": 2.7682478427886963, + "learning_rate": 1.8848360050990042e-06, + "loss": 0.2496, + "num_input_tokens_seen": 4448320, + "step": 9050 + }, + { + "epoch": 1.1950640095024416, + "grad_norm": 0.15781430900096893, + "learning_rate": 1.8846212707467216e-06, + "loss": 0.0971, + "num_input_tokens_seen": 4450880, + "step": 9055 + }, + { + "epoch": 1.1957239012801901, + "grad_norm": 0.2587769329547882, + "learning_rate": 1.8844063486399805e-06, + "loss": 0.002, + "num_input_tokens_seen": 4453824, + "step": 9060 + }, + { + "epoch": 1.1963837930579384, + "grad_norm": 29.7452392578125, + "learning_rate": 1.884191238824396e-06, + "loss": 0.1786, + "num_input_tokens_seen": 4456448, + "step": 9065 + }, + { + "epoch": 1.197043684835687, + "grad_norm": 0.1104031577706337, + "learning_rate": 1.883975941345624e-06, + "loss": 0.1482, + "num_input_tokens_seen": 4458880, + "step": 9070 + }, + { + "epoch": 1.1977035766134354, + "grad_norm": 11.88070011138916, + "learning_rate": 1.8837604562493597e-06, + "loss": 0.1391, + "num_input_tokens_seen": 4461504, + "step": 9075 + }, + { + "epoch": 1.198363468391184, + "grad_norm": 35.3869514465332, + "learning_rate": 1.883544783581338e-06, + "loss": 0.1583, + "num_input_tokens_seen": 4464000, + "step": 9080 + }, + { + "epoch": 1.1990233601689324, + "grad_norm": 0.23158396780490875, + "learning_rate": 1.8833289233873346e-06, + "loss": 0.0736, + "num_input_tokens_seen": 4466368, + "step": 9085 + }, + { + "epoch": 1.1996832519466807, + "grad_norm": 159.66079711914062, + "learning_rate": 1.8831128757131634e-06, + "loss": 0.1445, + "num_input_tokens_seen": 4468800, + "step": 9090 + }, + { + "epoch": 1.2003431437244292, + "grad_norm": 0.3382391035556793, + "learning_rate": 1.8828966406046796e-06, + "loss": 0.1592, + "num_input_tokens_seen": 4471296, + "step": 9095 + }, + { + "epoch": 1.2010030355021777, + "grad_norm": 0.09924346208572388, + "learning_rate": 1.8826802181077771e-06, + "loss": 0.0024, + "num_input_tokens_seen": 4473856, + "step": 9100 + }, + { + "epoch": 1.2016629272799262, + "grad_norm": 0.3284415304660797, + "learning_rate": 1.8824636082683903e-06, + "loss": 0.0028, + "num_input_tokens_seen": 4476416, + "step": 9105 + }, + { + "epoch": 1.2023228190576745, + "grad_norm": 0.0883711501955986, + "learning_rate": 1.8822468111324927e-06, + "loss": 0.044, + "num_input_tokens_seen": 4478848, + "step": 9110 + }, + { + "epoch": 1.202982710835423, + "grad_norm": 13.112217903137207, + "learning_rate": 1.8820298267460983e-06, + "loss": 0.1416, + "num_input_tokens_seen": 4481088, + "step": 9115 + }, + { + "epoch": 1.2036426026131715, + "grad_norm": 13.190703392028809, + "learning_rate": 1.8818126551552605e-06, + "loss": 0.0922, + "num_input_tokens_seen": 4483392, + "step": 9120 + }, + { + "epoch": 1.20430249439092, + "grad_norm": 12.654837608337402, + "learning_rate": 1.881595296406072e-06, + "loss": 0.1309, + "num_input_tokens_seen": 4485760, + "step": 9125 + }, + { + "epoch": 1.2049623861686682, + "grad_norm": 0.05298277735710144, + "learning_rate": 1.881377750544666e-06, + "loss": 0.0023, + "num_input_tokens_seen": 4488064, + "step": 9130 + }, + { + "epoch": 1.2056222779464167, + "grad_norm": 91.60076904296875, + "learning_rate": 1.8811600176172147e-06, + "loss": 0.0897, + "num_input_tokens_seen": 4490048, + "step": 9135 + }, + { + "epoch": 1.2062821697241652, + "grad_norm": 0.06908053904771805, + "learning_rate": 1.8809420976699308e-06, + "loss": 0.0015, + "num_input_tokens_seen": 4492480, + "step": 9140 + }, + { + "epoch": 1.2069420615019137, + "grad_norm": 0.49360230565071106, + "learning_rate": 1.8807239907490656e-06, + "loss": 0.0265, + "num_input_tokens_seen": 4495104, + "step": 9145 + }, + { + "epoch": 1.2076019532796622, + "grad_norm": 0.07297612726688385, + "learning_rate": 1.8805056969009114e-06, + "loss": 0.2775, + "num_input_tokens_seen": 4497408, + "step": 9150 + }, + { + "epoch": 1.2082618450574105, + "grad_norm": 0.08615783601999283, + "learning_rate": 1.8802872161717988e-06, + "loss": 0.0605, + "num_input_tokens_seen": 4499584, + "step": 9155 + }, + { + "epoch": 1.208921736835159, + "grad_norm": 0.13035784661769867, + "learning_rate": 1.8800685486080994e-06, + "loss": 0.0287, + "num_input_tokens_seen": 4502336, + "step": 9160 + }, + { + "epoch": 1.2095816286129075, + "grad_norm": 0.030498240143060684, + "learning_rate": 1.8798496942562235e-06, + "loss": 0.0638, + "num_input_tokens_seen": 4505024, + "step": 9165 + }, + { + "epoch": 1.210241520390656, + "grad_norm": 14.509490013122559, + "learning_rate": 1.879630653162621e-06, + "loss": 0.0649, + "num_input_tokens_seen": 4507776, + "step": 9170 + }, + { + "epoch": 1.2109014121684043, + "grad_norm": 16.562963485717773, + "learning_rate": 1.8794114253737825e-06, + "loss": 0.0745, + "num_input_tokens_seen": 4510144, + "step": 9175 + }, + { + "epoch": 1.2115613039461528, + "grad_norm": 0.2812765836715698, + "learning_rate": 1.8791920109362373e-06, + "loss": 0.0387, + "num_input_tokens_seen": 4512704, + "step": 9180 + }, + { + "epoch": 1.2122211957239013, + "grad_norm": 0.5217291712760925, + "learning_rate": 1.878972409896554e-06, + "loss": 0.0823, + "num_input_tokens_seen": 4515264, + "step": 9185 + }, + { + "epoch": 1.2128810875016498, + "grad_norm": 0.06650111079216003, + "learning_rate": 1.878752622301342e-06, + "loss": 0.0804, + "num_input_tokens_seen": 4518016, + "step": 9190 + }, + { + "epoch": 1.213540979279398, + "grad_norm": 0.028405936434864998, + "learning_rate": 1.8785326481972491e-06, + "loss": 0.0554, + "num_input_tokens_seen": 4520320, + "step": 9195 + }, + { + "epoch": 1.2142008710571466, + "grad_norm": 15.38084602355957, + "learning_rate": 1.8783124876309637e-06, + "loss": 0.0832, + "num_input_tokens_seen": 4522432, + "step": 9200 + }, + { + "epoch": 1.214860762834895, + "grad_norm": 0.029807792976498604, + "learning_rate": 1.878092140649213e-06, + "loss": 0.0009, + "num_input_tokens_seen": 4524864, + "step": 9205 + }, + { + "epoch": 1.2155206546126436, + "grad_norm": 0.27122005820274353, + "learning_rate": 1.8778716072987638e-06, + "loss": 0.0041, + "num_input_tokens_seen": 4527296, + "step": 9210 + }, + { + "epoch": 1.216180546390392, + "grad_norm": 0.07015712559223175, + "learning_rate": 1.8776508876264235e-06, + "loss": 0.1339, + "num_input_tokens_seen": 4529792, + "step": 9215 + }, + { + "epoch": 1.2168404381681404, + "grad_norm": 0.8073084354400635, + "learning_rate": 1.8774299816790373e-06, + "loss": 0.0356, + "num_input_tokens_seen": 4532288, + "step": 9220 + }, + { + "epoch": 1.2175003299458889, + "grad_norm": 18.83184051513672, + "learning_rate": 1.8772088895034916e-06, + "loss": 0.0769, + "num_input_tokens_seen": 4534848, + "step": 9225 + }, + { + "epoch": 1.2181602217236374, + "grad_norm": 0.01886264607310295, + "learning_rate": 1.876987611146711e-06, + "loss": 0.0675, + "num_input_tokens_seen": 4537280, + "step": 9230 + }, + { + "epoch": 1.2188201135013859, + "grad_norm": 0.6087026000022888, + "learning_rate": 1.876766146655661e-06, + "loss": 0.158, + "num_input_tokens_seen": 4539776, + "step": 9235 + }, + { + "epoch": 1.2194800052791341, + "grad_norm": 0.8865079879760742, + "learning_rate": 1.8765444960773453e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4542144, + "step": 9240 + }, + { + "epoch": 1.2201398970568826, + "grad_norm": 0.08013620227575302, + "learning_rate": 1.8763226594588078e-06, + "loss": 0.0557, + "num_input_tokens_seen": 4544576, + "step": 9245 + }, + { + "epoch": 1.2207997888346311, + "grad_norm": 0.12226373702287674, + "learning_rate": 1.8761006368471315e-06, + "loss": 0.0475, + "num_input_tokens_seen": 4547264, + "step": 9250 + }, + { + "epoch": 1.2214596806123796, + "grad_norm": 0.0244253259152174, + "learning_rate": 1.8758784282894394e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4549696, + "step": 9255 + }, + { + "epoch": 1.222119572390128, + "grad_norm": 0.04465080052614212, + "learning_rate": 1.8756560338328934e-06, + "loss": 0.0956, + "num_input_tokens_seen": 4552000, + "step": 9260 + }, + { + "epoch": 1.2227794641678764, + "grad_norm": 0.03824853524565697, + "learning_rate": 1.8754334535246952e-06, + "loss": 0.0492, + "num_input_tokens_seen": 4554624, + "step": 9265 + }, + { + "epoch": 1.223439355945625, + "grad_norm": 11.81065559387207, + "learning_rate": 1.875210687412086e-06, + "loss": 0.0658, + "num_input_tokens_seen": 4557056, + "step": 9270 + }, + { + "epoch": 1.2240992477233734, + "grad_norm": 21.275474548339844, + "learning_rate": 1.874987735542346e-06, + "loss": 0.0784, + "num_input_tokens_seen": 4559488, + "step": 9275 + }, + { + "epoch": 1.224759139501122, + "grad_norm": 0.07805287837982178, + "learning_rate": 1.8747645979627955e-06, + "loss": 0.1279, + "num_input_tokens_seen": 4561984, + "step": 9280 + }, + { + "epoch": 1.2254190312788702, + "grad_norm": 0.07274620980024338, + "learning_rate": 1.8745412747207933e-06, + "loss": 0.0544, + "num_input_tokens_seen": 4564736, + "step": 9285 + }, + { + "epoch": 1.2260789230566187, + "grad_norm": 0.25491011142730713, + "learning_rate": 1.8743177658637387e-06, + "loss": 0.0699, + "num_input_tokens_seen": 4567232, + "step": 9290 + }, + { + "epoch": 1.2267388148343672, + "grad_norm": 69.74526977539062, + "learning_rate": 1.8740940714390697e-06, + "loss": 0.1415, + "num_input_tokens_seen": 4569664, + "step": 9295 + }, + { + "epoch": 1.2273987066121157, + "grad_norm": 0.05679222196340561, + "learning_rate": 1.8738701914942636e-06, + "loss": 0.0011, + "num_input_tokens_seen": 4572096, + "step": 9300 + }, + { + "epoch": 1.228058598389864, + "grad_norm": 0.05214114487171173, + "learning_rate": 1.8736461260768375e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4574528, + "step": 9305 + }, + { + "epoch": 1.2287184901676125, + "grad_norm": 0.25871649384498596, + "learning_rate": 1.8734218752343475e-06, + "loss": 0.094, + "num_input_tokens_seen": 4577088, + "step": 9310 + }, + { + "epoch": 1.229378381945361, + "grad_norm": 0.01494324766099453, + "learning_rate": 1.8731974390143894e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4579456, + "step": 9315 + }, + { + "epoch": 1.2300382737231095, + "grad_norm": 49.50048828125, + "learning_rate": 1.872972817464598e-06, + "loss": 0.3725, + "num_input_tokens_seen": 4581824, + "step": 9320 + }, + { + "epoch": 1.2306981655008578, + "grad_norm": 0.17533209919929504, + "learning_rate": 1.8727480106326476e-06, + "loss": 0.0324, + "num_input_tokens_seen": 4584256, + "step": 9325 + }, + { + "epoch": 1.2313580572786063, + "grad_norm": 32.87931823730469, + "learning_rate": 1.872523018566252e-06, + "loss": 0.1018, + "num_input_tokens_seen": 4587008, + "step": 9330 + }, + { + "epoch": 1.2320179490563548, + "grad_norm": 0.40437552332878113, + "learning_rate": 1.8722978413131641e-06, + "loss": 0.1404, + "num_input_tokens_seen": 4589824, + "step": 9335 + }, + { + "epoch": 1.2326778408341033, + "grad_norm": 53.637935638427734, + "learning_rate": 1.8720724789211758e-06, + "loss": 0.1816, + "num_input_tokens_seen": 4592448, + "step": 9340 + }, + { + "epoch": 1.2333377326118518, + "grad_norm": 0.4547289311885834, + "learning_rate": 1.871846931438119e-06, + "loss": 0.0031, + "num_input_tokens_seen": 4594880, + "step": 9345 + }, + { + "epoch": 1.2339976243896, + "grad_norm": 59.51481246948242, + "learning_rate": 1.8716211989118645e-06, + "loss": 0.4028, + "num_input_tokens_seen": 4597120, + "step": 9350 + }, + { + "epoch": 1.2346575161673485, + "grad_norm": 16.716562271118164, + "learning_rate": 1.8713952813903222e-06, + "loss": 0.1788, + "num_input_tokens_seen": 4599552, + "step": 9355 + }, + { + "epoch": 1.235317407945097, + "grad_norm": 0.6430609822273254, + "learning_rate": 1.8711691789214416e-06, + "loss": 0.0456, + "num_input_tokens_seen": 4602048, + "step": 9360 + }, + { + "epoch": 1.2359772997228455, + "grad_norm": 0.11246831715106964, + "learning_rate": 1.8709428915532114e-06, + "loss": 0.0675, + "num_input_tokens_seen": 4604352, + "step": 9365 + }, + { + "epoch": 1.2366371915005938, + "grad_norm": 0.9561209678649902, + "learning_rate": 1.8707164193336595e-06, + "loss": 0.1284, + "num_input_tokens_seen": 4606720, + "step": 9370 + }, + { + "epoch": 1.2372970832783423, + "grad_norm": 12.950846672058105, + "learning_rate": 1.8704897623108527e-06, + "loss": 0.1365, + "num_input_tokens_seen": 4609088, + "step": 9375 + }, + { + "epoch": 1.2379569750560908, + "grad_norm": 34.06569290161133, + "learning_rate": 1.8702629205328973e-06, + "loss": 0.0896, + "num_input_tokens_seen": 4611712, + "step": 9380 + }, + { + "epoch": 1.2386168668338393, + "grad_norm": 0.2726440131664276, + "learning_rate": 1.8700358940479387e-06, + "loss": 0.1082, + "num_input_tokens_seen": 4614080, + "step": 9385 + }, + { + "epoch": 1.2392767586115876, + "grad_norm": 0.19074247777462006, + "learning_rate": 1.8698086829041624e-06, + "loss": 0.0484, + "num_input_tokens_seen": 4616256, + "step": 9390 + }, + { + "epoch": 1.239936650389336, + "grad_norm": 6.18501615524292, + "learning_rate": 1.8695812871497915e-06, + "loss": 0.0028, + "num_input_tokens_seen": 4618688, + "step": 9395 + }, + { + "epoch": 1.2405965421670846, + "grad_norm": 18.376789093017578, + "learning_rate": 1.8693537068330898e-06, + "loss": 0.1368, + "num_input_tokens_seen": 4621184, + "step": 9400 + }, + { + "epoch": 1.241256433944833, + "grad_norm": 67.07162475585938, + "learning_rate": 1.8691259420023589e-06, + "loss": 0.189, + "num_input_tokens_seen": 4623616, + "step": 9405 + }, + { + "epoch": 1.2419163257225816, + "grad_norm": 0.06652729958295822, + "learning_rate": 1.8688979927059405e-06, + "loss": 0.0022, + "num_input_tokens_seen": 4626048, + "step": 9410 + }, + { + "epoch": 1.2425762175003299, + "grad_norm": 20.42545509338379, + "learning_rate": 1.8686698589922154e-06, + "loss": 0.2181, + "num_input_tokens_seen": 4628544, + "step": 9415 + }, + { + "epoch": 1.2432361092780784, + "grad_norm": 0.010046327486634254, + "learning_rate": 1.868441540909603e-06, + "loss": 0.0009, + "num_input_tokens_seen": 4631296, + "step": 9420 + }, + { + "epoch": 1.2438960010558269, + "grad_norm": 0.02093764953315258, + "learning_rate": 1.8682130385065622e-06, + "loss": 0.0018, + "num_input_tokens_seen": 4633664, + "step": 9425 + }, + { + "epoch": 1.2445558928335754, + "grad_norm": 0.21397200226783752, + "learning_rate": 1.8679843518315911e-06, + "loss": 0.001, + "num_input_tokens_seen": 4636224, + "step": 9430 + }, + { + "epoch": 1.2452157846113237, + "grad_norm": 0.056055840104818344, + "learning_rate": 1.8677554809332272e-06, + "loss": 0.0792, + "num_input_tokens_seen": 4638720, + "step": 9435 + }, + { + "epoch": 1.2458756763890722, + "grad_norm": 0.04379117116332054, + "learning_rate": 1.8675264258600459e-06, + "loss": 0.0399, + "num_input_tokens_seen": 4641280, + "step": 9440 + }, + { + "epoch": 1.2465355681668207, + "grad_norm": 0.05579795688390732, + "learning_rate": 1.8672971866606627e-06, + "loss": 0.0117, + "num_input_tokens_seen": 4643648, + "step": 9445 + }, + { + "epoch": 1.2471954599445692, + "grad_norm": 0.11813732236623764, + "learning_rate": 1.8670677633837321e-06, + "loss": 0.0879, + "num_input_tokens_seen": 4646016, + "step": 9450 + }, + { + "epoch": 1.2478553517223174, + "grad_norm": 19.978891372680664, + "learning_rate": 1.8668381560779478e-06, + "loss": 0.2114, + "num_input_tokens_seen": 4648320, + "step": 9455 + }, + { + "epoch": 1.248515243500066, + "grad_norm": 2.6825110912323, + "learning_rate": 1.866608364792042e-06, + "loss": 0.0495, + "num_input_tokens_seen": 4650944, + "step": 9460 + }, + { + "epoch": 1.2491751352778144, + "grad_norm": 0.8223278522491455, + "learning_rate": 1.8663783895747863e-06, + "loss": 0.0025, + "num_input_tokens_seen": 4653440, + "step": 9465 + }, + { + "epoch": 1.249835027055563, + "grad_norm": 22.685306549072266, + "learning_rate": 1.8661482304749911e-06, + "loss": 0.1382, + "num_input_tokens_seen": 4656064, + "step": 9470 + }, + { + "epoch": 1.2504949188333114, + "grad_norm": 0.029461894184350967, + "learning_rate": 1.8659178875415062e-06, + "loss": 0.1179, + "num_input_tokens_seen": 4658240, + "step": 9475 + }, + { + "epoch": 1.2504949188333114, + "eval_loss": 0.11660958081483841, + "eval_runtime": 7.8187, + "eval_samples_per_second": 861.395, + "eval_steps_per_second": 107.69, + "num_input_tokens_seen": 4658240, + "step": 9475 + }, + { + "epoch": 1.2511548106110597, + "grad_norm": 0.46132540702819824, + "learning_rate": 1.86568736082322e-06, + "loss": 0.001, + "num_input_tokens_seen": 4660992, + "step": 9480 + }, + { + "epoch": 1.2518147023888082, + "grad_norm": 0.01598983258008957, + "learning_rate": 1.8654566503690606e-06, + "loss": 0.0584, + "num_input_tokens_seen": 4663488, + "step": 9485 + }, + { + "epoch": 1.2524745941665567, + "grad_norm": 0.13625356554985046, + "learning_rate": 1.8652257562279942e-06, + "loss": 0.0698, + "num_input_tokens_seen": 4666112, + "step": 9490 + }, + { + "epoch": 1.2531344859443052, + "grad_norm": 325.95355224609375, + "learning_rate": 1.864994678449026e-06, + "loss": 0.1508, + "num_input_tokens_seen": 4668992, + "step": 9495 + }, + { + "epoch": 1.2537943777220537, + "grad_norm": 42.81782150268555, + "learning_rate": 1.864763417081202e-06, + "loss": 0.1196, + "num_input_tokens_seen": 4671168, + "step": 9500 + }, + { + "epoch": 1.254454269499802, + "grad_norm": 0.0359887033700943, + "learning_rate": 1.864531972173604e-06, + "loss": 0.0589, + "num_input_tokens_seen": 4673600, + "step": 9505 + }, + { + "epoch": 1.2551141612775505, + "grad_norm": 0.061382975429296494, + "learning_rate": 1.8643003437753557e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4676224, + "step": 9510 + }, + { + "epoch": 1.255774053055299, + "grad_norm": 18.239227294921875, + "learning_rate": 1.8640685319356181e-06, + "loss": 0.1122, + "num_input_tokens_seen": 4678592, + "step": 9515 + }, + { + "epoch": 1.2564339448330473, + "grad_norm": 0.4076708257198334, + "learning_rate": 1.8638365367035922e-06, + "loss": 0.001, + "num_input_tokens_seen": 4680960, + "step": 9520 + }, + { + "epoch": 1.2570938366107958, + "grad_norm": 22.06498146057129, + "learning_rate": 1.863604358128516e-06, + "loss": 0.0433, + "num_input_tokens_seen": 4683264, + "step": 9525 + }, + { + "epoch": 1.2577537283885443, + "grad_norm": 0.36363160610198975, + "learning_rate": 1.8633719962596693e-06, + "loss": 0.0433, + "num_input_tokens_seen": 4685760, + "step": 9530 + }, + { + "epoch": 1.2584136201662928, + "grad_norm": 36.61283493041992, + "learning_rate": 1.863139451146368e-06, + "loss": 0.0812, + "num_input_tokens_seen": 4688256, + "step": 9535 + }, + { + "epoch": 1.2590735119440413, + "grad_norm": 0.03277817741036415, + "learning_rate": 1.8629067228379687e-06, + "loss": 0.0911, + "num_input_tokens_seen": 4690624, + "step": 9540 + }, + { + "epoch": 1.2597334037217895, + "grad_norm": 0.06794434040784836, + "learning_rate": 1.8626738113838657e-06, + "loss": 0.0416, + "num_input_tokens_seen": 4693376, + "step": 9545 + }, + { + "epoch": 1.260393295499538, + "grad_norm": 0.5603092312812805, + "learning_rate": 1.8624407168334938e-06, + "loss": 0.0004, + "num_input_tokens_seen": 4695936, + "step": 9550 + }, + { + "epoch": 1.2610531872772865, + "grad_norm": 18.329586029052734, + "learning_rate": 1.8622074392363249e-06, + "loss": 0.0927, + "num_input_tokens_seen": 4698304, + "step": 9555 + }, + { + "epoch": 1.261713079055035, + "grad_norm": 8.598894119262695, + "learning_rate": 1.8619739786418707e-06, + "loss": 0.0017, + "num_input_tokens_seen": 4700608, + "step": 9560 + }, + { + "epoch": 1.2623729708327835, + "grad_norm": 0.1886700987815857, + "learning_rate": 1.8617403350996814e-06, + "loss": 0.0491, + "num_input_tokens_seen": 4702976, + "step": 9565 + }, + { + "epoch": 1.2630328626105318, + "grad_norm": 19.052263259887695, + "learning_rate": 1.861506508659346e-06, + "loss": 0.1877, + "num_input_tokens_seen": 4705408, + "step": 9570 + }, + { + "epoch": 1.2636927543882803, + "grad_norm": 0.01895013637840748, + "learning_rate": 1.861272499370493e-06, + "loss": 0.1504, + "num_input_tokens_seen": 4708032, + "step": 9575 + }, + { + "epoch": 1.2643526461660288, + "grad_norm": 19.410493850708008, + "learning_rate": 1.8610383072827887e-06, + "loss": 0.073, + "num_input_tokens_seen": 4710400, + "step": 9580 + }, + { + "epoch": 1.265012537943777, + "grad_norm": 15.133025169372559, + "learning_rate": 1.8608039324459388e-06, + "loss": 0.0615, + "num_input_tokens_seen": 4712960, + "step": 9585 + }, + { + "epoch": 1.2656724297215256, + "grad_norm": 0.11856388300657272, + "learning_rate": 1.8605693749096877e-06, + "loss": 0.0543, + "num_input_tokens_seen": 4715200, + "step": 9590 + }, + { + "epoch": 1.266332321499274, + "grad_norm": 17.171472549438477, + "learning_rate": 1.8603346347238185e-06, + "loss": 0.1053, + "num_input_tokens_seen": 4717568, + "step": 9595 + }, + { + "epoch": 1.2669922132770226, + "grad_norm": 0.1262669414281845, + "learning_rate": 1.8600997119381533e-06, + "loss": 0.1185, + "num_input_tokens_seen": 4719936, + "step": 9600 + }, + { + "epoch": 1.267652105054771, + "grad_norm": 0.5233826637268066, + "learning_rate": 1.8598646066025523e-06, + "loss": 0.092, + "num_input_tokens_seen": 4722368, + "step": 9605 + }, + { + "epoch": 1.2683119968325194, + "grad_norm": 0.26132288575172424, + "learning_rate": 1.8596293187669155e-06, + "loss": 0.0026, + "num_input_tokens_seen": 4724864, + "step": 9610 + }, + { + "epoch": 1.2689718886102679, + "grad_norm": 0.10552530735731125, + "learning_rate": 1.8593938484811806e-06, + "loss": 0.0039, + "num_input_tokens_seen": 4727424, + "step": 9615 + }, + { + "epoch": 1.2696317803880164, + "grad_norm": 21.18337059020996, + "learning_rate": 1.8591581957953245e-06, + "loss": 0.0911, + "num_input_tokens_seen": 4729600, + "step": 9620 + }, + { + "epoch": 1.2702916721657649, + "grad_norm": 0.06999889761209488, + "learning_rate": 1.8589223607593628e-06, + "loss": 0.0008, + "num_input_tokens_seen": 4732352, + "step": 9625 + }, + { + "epoch": 1.2709515639435134, + "grad_norm": 0.03068052977323532, + "learning_rate": 1.8586863434233502e-06, + "loss": 0.0029, + "num_input_tokens_seen": 4734848, + "step": 9630 + }, + { + "epoch": 1.2716114557212617, + "grad_norm": 1.2916810512542725, + "learning_rate": 1.8584501438373793e-06, + "loss": 0.0696, + "num_input_tokens_seen": 4737216, + "step": 9635 + }, + { + "epoch": 1.2722713474990102, + "grad_norm": 17.11394691467285, + "learning_rate": 1.8582137620515816e-06, + "loss": 0.0958, + "num_input_tokens_seen": 4739712, + "step": 9640 + }, + { + "epoch": 1.2729312392767587, + "grad_norm": 0.03202284872531891, + "learning_rate": 1.8579771981161277e-06, + "loss": 0.2084, + "num_input_tokens_seen": 4742144, + "step": 9645 + }, + { + "epoch": 1.273591131054507, + "grad_norm": 0.006628343369811773, + "learning_rate": 1.8577404520812262e-06, + "loss": 0.0001, + "num_input_tokens_seen": 4744832, + "step": 9650 + }, + { + "epoch": 1.2742510228322554, + "grad_norm": 65.34660339355469, + "learning_rate": 1.8575035239971255e-06, + "loss": 0.0457, + "num_input_tokens_seen": 4747584, + "step": 9655 + }, + { + "epoch": 1.274910914610004, + "grad_norm": 0.11509735137224197, + "learning_rate": 1.857266413914111e-06, + "loss": 0.0005, + "num_input_tokens_seen": 4750016, + "step": 9660 + }, + { + "epoch": 1.2755708063877524, + "grad_norm": 0.03837760165333748, + "learning_rate": 1.8570291218825082e-06, + "loss": 0.0567, + "num_input_tokens_seen": 4752576, + "step": 9665 + }, + { + "epoch": 1.276230698165501, + "grad_norm": 0.053258758038282394, + "learning_rate": 1.8567916479526802e-06, + "loss": 0.0774, + "num_input_tokens_seen": 4754752, + "step": 9670 + }, + { + "epoch": 1.2768905899432492, + "grad_norm": 2.510385036468506, + "learning_rate": 1.8565539921750295e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4756864, + "step": 9675 + }, + { + "epoch": 1.2775504817209977, + "grad_norm": 88.27272033691406, + "learning_rate": 1.8563161545999965e-06, + "loss": 0.0764, + "num_input_tokens_seen": 4759424, + "step": 9680 + }, + { + "epoch": 1.2782103734987462, + "grad_norm": 24.547399520874023, + "learning_rate": 1.8560781352780607e-06, + "loss": 0.2287, + "num_input_tokens_seen": 4761792, + "step": 9685 + }, + { + "epoch": 1.2788702652764947, + "grad_norm": 0.13503701984882355, + "learning_rate": 1.8558399342597402e-06, + "loss": 0.0725, + "num_input_tokens_seen": 4764544, + "step": 9690 + }, + { + "epoch": 1.2795301570542432, + "grad_norm": 0.033040594309568405, + "learning_rate": 1.8556015515955907e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4766912, + "step": 9695 + }, + { + "epoch": 1.2801900488319915, + "grad_norm": 0.05509025976061821, + "learning_rate": 1.8553629873362079e-06, + "loss": 0.063, + "num_input_tokens_seen": 4769280, + "step": 9700 + }, + { + "epoch": 1.28084994060974, + "grad_norm": 0.16460861265659332, + "learning_rate": 1.855124241532225e-06, + "loss": 0.0511, + "num_input_tokens_seen": 4772032, + "step": 9705 + }, + { + "epoch": 1.2815098323874885, + "grad_norm": 0.01625911518931389, + "learning_rate": 1.8548853142343142e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4774400, + "step": 9710 + }, + { + "epoch": 1.2821697241652368, + "grad_norm": 0.3575328290462494, + "learning_rate": 1.854646205493186e-06, + "loss": 0.2591, + "num_input_tokens_seen": 4776640, + "step": 9715 + }, + { + "epoch": 1.2828296159429853, + "grad_norm": 39.71146774291992, + "learning_rate": 1.8544069153595896e-06, + "loss": 0.0848, + "num_input_tokens_seen": 4779008, + "step": 9720 + }, + { + "epoch": 1.2834895077207338, + "grad_norm": 0.4594075679779053, + "learning_rate": 1.8541674438843125e-06, + "loss": 0.2256, + "num_input_tokens_seen": 4781696, + "step": 9725 + }, + { + "epoch": 1.2841493994984823, + "grad_norm": 0.4085526764392853, + "learning_rate": 1.8539277911181809e-06, + "loss": 0.0343, + "num_input_tokens_seen": 4784192, + "step": 9730 + }, + { + "epoch": 1.2848092912762308, + "grad_norm": 0.5087748765945435, + "learning_rate": 1.8536879571120593e-06, + "loss": 0.0027, + "num_input_tokens_seen": 4786880, + "step": 9735 + }, + { + "epoch": 1.285469183053979, + "grad_norm": 0.07352367043495178, + "learning_rate": 1.8534479419168508e-06, + "loss": 0.1961, + "num_input_tokens_seen": 4789696, + "step": 9740 + }, + { + "epoch": 1.2861290748317276, + "grad_norm": 0.043694186955690384, + "learning_rate": 1.8532077455834964e-06, + "loss": 0.0241, + "num_input_tokens_seen": 4792384, + "step": 9745 + }, + { + "epoch": 1.286788966609476, + "grad_norm": 37.857357025146484, + "learning_rate": 1.8529673681629766e-06, + "loss": 0.1954, + "num_input_tokens_seen": 4794944, + "step": 9750 + }, + { + "epoch": 1.2874488583872246, + "grad_norm": 1.7321579456329346, + "learning_rate": 1.85272680970631e-06, + "loss": 0.0025, + "num_input_tokens_seen": 4797376, + "step": 9755 + }, + { + "epoch": 1.288108750164973, + "grad_norm": 0.08831868320703506, + "learning_rate": 1.8524860702645527e-06, + "loss": 0.0051, + "num_input_tokens_seen": 4799808, + "step": 9760 + }, + { + "epoch": 1.2887686419427213, + "grad_norm": 0.02845185063779354, + "learning_rate": 1.8522451498888004e-06, + "loss": 0.0732, + "num_input_tokens_seen": 4802560, + "step": 9765 + }, + { + "epoch": 1.2894285337204698, + "grad_norm": 0.15058395266532898, + "learning_rate": 1.8520040486301862e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4804736, + "step": 9770 + }, + { + "epoch": 1.2900884254982183, + "grad_norm": 19.97967529296875, + "learning_rate": 1.8517627665398825e-06, + "loss": 0.1509, + "num_input_tokens_seen": 4807040, + "step": 9775 + }, + { + "epoch": 1.2907483172759666, + "grad_norm": 0.08964036405086517, + "learning_rate": 1.8515213036690996e-06, + "loss": 0.0015, + "num_input_tokens_seen": 4809152, + "step": 9780 + }, + { + "epoch": 1.2914082090537151, + "grad_norm": 0.08650282025337219, + "learning_rate": 1.8512796600690864e-06, + "loss": 0.0002, + "num_input_tokens_seen": 4811776, + "step": 9785 + }, + { + "epoch": 1.2920681008314636, + "grad_norm": 0.010320809669792652, + "learning_rate": 1.8510378357911297e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4814272, + "step": 9790 + }, + { + "epoch": 1.2927279926092121, + "grad_norm": 0.026830295100808144, + "learning_rate": 1.8507958308865551e-06, + "loss": 0.0535, + "num_input_tokens_seen": 4816576, + "step": 9795 + }, + { + "epoch": 1.2933878843869606, + "grad_norm": 0.5749787092208862, + "learning_rate": 1.8505536454067264e-06, + "loss": 0.0654, + "num_input_tokens_seen": 4819200, + "step": 9800 + }, + { + "epoch": 1.294047776164709, + "grad_norm": 0.1049313172698021, + "learning_rate": 1.8503112794030456e-06, + "loss": 0.134, + "num_input_tokens_seen": 4821824, + "step": 9805 + }, + { + "epoch": 1.2947076679424574, + "grad_norm": 0.16344867646694183, + "learning_rate": 1.8500687329269532e-06, + "loss": 0.0005, + "num_input_tokens_seen": 4824576, + "step": 9810 + }, + { + "epoch": 1.295367559720206, + "grad_norm": 0.026725659146904945, + "learning_rate": 1.8498260060299282e-06, + "loss": 0.0988, + "num_input_tokens_seen": 4827136, + "step": 9815 + }, + { + "epoch": 1.2960274514979544, + "grad_norm": 17.736068725585938, + "learning_rate": 1.849583098763487e-06, + "loss": 0.0933, + "num_input_tokens_seen": 4829312, + "step": 9820 + }, + { + "epoch": 1.296687343275703, + "grad_norm": 0.22278468310832977, + "learning_rate": 1.8493400111791858e-06, + "loss": 0.0507, + "num_input_tokens_seen": 4831808, + "step": 9825 + }, + { + "epoch": 1.2973472350534512, + "grad_norm": 0.05429788678884506, + "learning_rate": 1.8490967433286172e-06, + "loss": 0.1303, + "num_input_tokens_seen": 4834048, + "step": 9830 + }, + { + "epoch": 1.2980071268311997, + "grad_norm": 0.018094699829816818, + "learning_rate": 1.8488532952634138e-06, + "loss": 0.0016, + "num_input_tokens_seen": 4836416, + "step": 9835 + }, + { + "epoch": 1.2986670186089482, + "grad_norm": 3.234799385070801, + "learning_rate": 1.8486096670352448e-06, + "loss": 0.0792, + "num_input_tokens_seen": 4838656, + "step": 9840 + }, + { + "epoch": 1.2993269103866965, + "grad_norm": 118.24174499511719, + "learning_rate": 1.8483658586958198e-06, + "loss": 0.1515, + "num_input_tokens_seen": 4841024, + "step": 9845 + }, + { + "epoch": 1.299986802164445, + "grad_norm": 372.8785705566406, + "learning_rate": 1.8481218702968845e-06, + "loss": 0.0899, + "num_input_tokens_seen": 4843520, + "step": 9850 + }, + { + "epoch": 1.3006466939421935, + "grad_norm": 0.0978814959526062, + "learning_rate": 1.8478777018902236e-06, + "loss": 0.0714, + "num_input_tokens_seen": 4846208, + "step": 9855 + }, + { + "epoch": 1.301306585719942, + "grad_norm": 0.22887535393238068, + "learning_rate": 1.8476333535276605e-06, + "loss": 0.1439, + "num_input_tokens_seen": 4848768, + "step": 9860 + }, + { + "epoch": 1.3019664774976905, + "grad_norm": 0.10580222308635712, + "learning_rate": 1.8473888252610563e-06, + "loss": 0.0974, + "num_input_tokens_seen": 4851264, + "step": 9865 + }, + { + "epoch": 1.3026263692754387, + "grad_norm": 0.34106671810150146, + "learning_rate": 1.8471441171423101e-06, + "loss": 0.1057, + "num_input_tokens_seen": 4853632, + "step": 9870 + }, + { + "epoch": 1.3032862610531872, + "grad_norm": 0.170933797955513, + "learning_rate": 1.8468992292233595e-06, + "loss": 0.0011, + "num_input_tokens_seen": 4856256, + "step": 9875 + }, + { + "epoch": 1.3039461528309357, + "grad_norm": 0.6921259760856628, + "learning_rate": 1.8466541615561804e-06, + "loss": 0.076, + "num_input_tokens_seen": 4858752, + "step": 9880 + }, + { + "epoch": 1.3046060446086842, + "grad_norm": 0.012155439704656601, + "learning_rate": 1.8464089141927866e-06, + "loss": 0.0014, + "num_input_tokens_seen": 4861248, + "step": 9885 + }, + { + "epoch": 1.3052659363864327, + "grad_norm": 212.3350830078125, + "learning_rate": 1.8461634871852298e-06, + "loss": 0.2671, + "num_input_tokens_seen": 4863744, + "step": 9890 + }, + { + "epoch": 1.305925828164181, + "grad_norm": 0.13975022733211517, + "learning_rate": 1.8459178805856003e-06, + "loss": 0.0681, + "num_input_tokens_seen": 4865984, + "step": 9895 + }, + { + "epoch": 1.3065857199419295, + "grad_norm": 0.8244561553001404, + "learning_rate": 1.8456720944460265e-06, + "loss": 0.1544, + "num_input_tokens_seen": 4868480, + "step": 9900 + }, + { + "epoch": 1.307245611719678, + "grad_norm": 19.768590927124023, + "learning_rate": 1.8454261288186741e-06, + "loss": 0.1641, + "num_input_tokens_seen": 4870976, + "step": 9905 + }, + { + "epoch": 1.3079055034974263, + "grad_norm": 0.03448856994509697, + "learning_rate": 1.8451799837557483e-06, + "loss": 0.0584, + "num_input_tokens_seen": 4873472, + "step": 9910 + }, + { + "epoch": 1.3085653952751748, + "grad_norm": 0.015968943014740944, + "learning_rate": 1.8449336593094914e-06, + "loss": 0.0582, + "num_input_tokens_seen": 4876160, + "step": 9915 + }, + { + "epoch": 1.3092252870529233, + "grad_norm": 0.04296119883656502, + "learning_rate": 1.8446871555321834e-06, + "loss": 0.0677, + "num_input_tokens_seen": 4878400, + "step": 9920 + }, + { + "epoch": 1.3098851788306718, + "grad_norm": 2.142721652984619, + "learning_rate": 1.8444404724761436e-06, + "loss": 0.0026, + "num_input_tokens_seen": 4881152, + "step": 9925 + }, + { + "epoch": 1.3105450706084203, + "grad_norm": 0.272866427898407, + "learning_rate": 1.8441936101937285e-06, + "loss": 0.0665, + "num_input_tokens_seen": 4883648, + "step": 9930 + }, + { + "epoch": 1.3112049623861686, + "grad_norm": 0.026395170018076897, + "learning_rate": 1.8439465687373328e-06, + "loss": 0.1721, + "num_input_tokens_seen": 4885760, + "step": 9935 + }, + { + "epoch": 1.311864854163917, + "grad_norm": 0.05674809217453003, + "learning_rate": 1.8436993481593891e-06, + "loss": 0.0015, + "num_input_tokens_seen": 4888384, + "step": 9940 + }, + { + "epoch": 1.3125247459416656, + "grad_norm": 33.2212028503418, + "learning_rate": 1.8434519485123685e-06, + "loss": 0.084, + "num_input_tokens_seen": 4890880, + "step": 9945 + }, + { + "epoch": 1.313184637719414, + "grad_norm": 0.12552928924560547, + "learning_rate": 1.8432043698487796e-06, + "loss": 0.0374, + "num_input_tokens_seen": 4893184, + "step": 9950 + }, + { + "epoch": 1.3138445294971626, + "grad_norm": 0.028963031247258186, + "learning_rate": 1.8429566122211693e-06, + "loss": 0.0611, + "num_input_tokens_seen": 4895552, + "step": 9955 + }, + { + "epoch": 1.3145044212749109, + "grad_norm": 0.035612285137176514, + "learning_rate": 1.8427086756821222e-06, + "loss": 0.1239, + "num_input_tokens_seen": 4897856, + "step": 9960 + }, + { + "epoch": 1.3151643130526594, + "grad_norm": 0.13290604948997498, + "learning_rate": 1.842460560284261e-06, + "loss": 0.1224, + "num_input_tokens_seen": 4900352, + "step": 9965 + }, + { + "epoch": 1.3158242048304079, + "grad_norm": 0.07261032611131668, + "learning_rate": 1.8422122660802466e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4903040, + "step": 9970 + }, + { + "epoch": 1.3164840966081561, + "grad_norm": 0.05626000091433525, + "learning_rate": 1.8419637931227776e-06, + "loss": 0.0633, + "num_input_tokens_seen": 4905664, + "step": 9975 + }, + { + "epoch": 1.3171439883859046, + "grad_norm": 1.0715250968933105, + "learning_rate": 1.8417151414645904e-06, + "loss": 0.0512, + "num_input_tokens_seen": 4907840, + "step": 9980 + }, + { + "epoch": 1.3178038801636531, + "grad_norm": 0.10115410387516022, + "learning_rate": 1.84146631115846e-06, + "loss": 0.0012, + "num_input_tokens_seen": 4910144, + "step": 9985 + }, + { + "epoch": 1.3184637719414016, + "grad_norm": 23.29815101623535, + "learning_rate": 1.8412173022571979e-06, + "loss": 0.1102, + "num_input_tokens_seen": 4912640, + "step": 9990 + }, + { + "epoch": 1.3191236637191501, + "grad_norm": 0.16725671291351318, + "learning_rate": 1.8409681148136556e-06, + "loss": 0.0006, + "num_input_tokens_seen": 4914944, + "step": 9995 + }, + { + "epoch": 1.3197835554968984, + "grad_norm": 0.17661815881729126, + "learning_rate": 1.8407187488807203e-06, + "loss": 0.0516, + "num_input_tokens_seen": 4917568, + "step": 10000 + }, + { + "epoch": 1.320443447274647, + "grad_norm": 0.01880503073334694, + "learning_rate": 1.8404692045113185e-06, + "loss": 0.0525, + "num_input_tokens_seen": 4919680, + "step": 10005 + }, + { + "epoch": 1.3211033390523954, + "grad_norm": 229.0442657470703, + "learning_rate": 1.8402194817584147e-06, + "loss": 0.0183, + "num_input_tokens_seen": 4921856, + "step": 10010 + }, + { + "epoch": 1.321763230830144, + "grad_norm": 0.14362648129463196, + "learning_rate": 1.8399695806750098e-06, + "loss": 0.0421, + "num_input_tokens_seen": 4924288, + "step": 10015 + }, + { + "epoch": 1.3224231226078924, + "grad_norm": 0.1044907420873642, + "learning_rate": 1.8397195013141445e-06, + "loss": 0.1288, + "num_input_tokens_seen": 4926528, + "step": 10020 + }, + { + "epoch": 1.3230830143856407, + "grad_norm": 1.369716763496399, + "learning_rate": 1.8394692437288954e-06, + "loss": 0.004, + "num_input_tokens_seen": 4929344, + "step": 10025 + }, + { + "epoch": 1.3237429061633892, + "grad_norm": 26.978471755981445, + "learning_rate": 1.8392188079723784e-06, + "loss": 0.0934, + "num_input_tokens_seen": 4931776, + "step": 10030 + }, + { + "epoch": 1.3244027979411377, + "grad_norm": 0.09075580537319183, + "learning_rate": 1.8389681940977467e-06, + "loss": 0.0003, + "num_input_tokens_seen": 4934272, + "step": 10035 + }, + { + "epoch": 1.325062689718886, + "grad_norm": 0.08495357632637024, + "learning_rate": 1.838717402158191e-06, + "loss": 0.4409, + "num_input_tokens_seen": 4936960, + "step": 10040 + }, + { + "epoch": 1.3257225814966347, + "grad_norm": 0.0169760100543499, + "learning_rate": 1.83846643220694e-06, + "loss": 0.2134, + "num_input_tokens_seen": 4939776, + "step": 10045 + }, + { + "epoch": 1.326382473274383, + "grad_norm": 0.16505898535251617, + "learning_rate": 1.8382152842972607e-06, + "loss": 0.0947, + "num_input_tokens_seen": 4942208, + "step": 10050 + }, + { + "epoch": 1.3270423650521315, + "grad_norm": 0.145982563495636, + "learning_rate": 1.8379639584824572e-06, + "loss": 0.0015, + "num_input_tokens_seen": 4944448, + "step": 10055 + }, + { + "epoch": 1.32770225682988, + "grad_norm": 25.01861572265625, + "learning_rate": 1.8377124548158713e-06, + "loss": 0.177, + "num_input_tokens_seen": 4946816, + "step": 10060 + }, + { + "epoch": 1.3283621486076282, + "grad_norm": 0.07205390930175781, + "learning_rate": 1.8374607733508833e-06, + "loss": 0.0229, + "num_input_tokens_seen": 4949184, + "step": 10065 + }, + { + "epoch": 1.3290220403853767, + "grad_norm": 0.14163658022880554, + "learning_rate": 1.8372089141409108e-06, + "loss": 0.1654, + "num_input_tokens_seen": 4951616, + "step": 10070 + }, + { + "epoch": 1.3296819321631252, + "grad_norm": 0.1708272397518158, + "learning_rate": 1.8369568772394087e-06, + "loss": 0.1656, + "num_input_tokens_seen": 4954048, + "step": 10075 + }, + { + "epoch": 1.3303418239408737, + "grad_norm": 0.29565057158470154, + "learning_rate": 1.8367046626998702e-06, + "loss": 0.1187, + "num_input_tokens_seen": 4956160, + "step": 10080 + }, + { + "epoch": 1.3310017157186222, + "grad_norm": 12.916511535644531, + "learning_rate": 1.8364522705758257e-06, + "loss": 0.1228, + "num_input_tokens_seen": 4958528, + "step": 10085 + }, + { + "epoch": 1.3316616074963705, + "grad_norm": 69.16001892089844, + "learning_rate": 1.836199700920844e-06, + "loss": 0.1143, + "num_input_tokens_seen": 4960896, + "step": 10090 + }, + { + "epoch": 1.332321499274119, + "grad_norm": 0.2891543209552765, + "learning_rate": 1.8359469537885312e-06, + "loss": 0.0022, + "num_input_tokens_seen": 4963456, + "step": 10095 + }, + { + "epoch": 1.3329813910518675, + "grad_norm": 0.31188878417015076, + "learning_rate": 1.835694029232531e-06, + "loss": 0.0887, + "num_input_tokens_seen": 4965632, + "step": 10100 + }, + { + "epoch": 1.333641282829616, + "grad_norm": 0.077885203063488, + "learning_rate": 1.8354409273065247e-06, + "loss": 0.1001, + "num_input_tokens_seen": 4967936, + "step": 10105 + }, + { + "epoch": 1.3343011746073645, + "grad_norm": 2.893744468688965, + "learning_rate": 1.835187648064231e-06, + "loss": 0.0025, + "num_input_tokens_seen": 4970240, + "step": 10110 + }, + { + "epoch": 1.3349610663851128, + "grad_norm": 0.04158975929021835, + "learning_rate": 1.8349341915594073e-06, + "loss": 0.001, + "num_input_tokens_seen": 4972992, + "step": 10115 + }, + { + "epoch": 1.3356209581628613, + "grad_norm": 124.41069793701172, + "learning_rate": 1.8346805578458474e-06, + "loss": 0.1337, + "num_input_tokens_seen": 4975616, + "step": 10120 + }, + { + "epoch": 1.3362808499406098, + "grad_norm": 0.10024034976959229, + "learning_rate": 1.8344267469773835e-06, + "loss": 0.0462, + "num_input_tokens_seen": 4978112, + "step": 10125 + }, + { + "epoch": 1.336940741718358, + "grad_norm": 0.6105455756187439, + "learning_rate": 1.8341727590078847e-06, + "loss": 0.0005, + "num_input_tokens_seen": 4980352, + "step": 10130 + }, + { + "epoch": 1.3376006334961066, + "grad_norm": 339.4839782714844, + "learning_rate": 1.8339185939912589e-06, + "loss": 0.0783, + "num_input_tokens_seen": 4982656, + "step": 10135 + }, + { + "epoch": 1.338260525273855, + "grad_norm": 0.013025536201894283, + "learning_rate": 1.83366425198145e-06, + "loss": 0.0077, + "num_input_tokens_seen": 4985216, + "step": 10140 + }, + { + "epoch": 1.3389204170516036, + "grad_norm": 0.046981412917375565, + "learning_rate": 1.8334097330324405e-06, + "loss": 0.0159, + "num_input_tokens_seen": 4987904, + "step": 10145 + }, + { + "epoch": 1.339580308829352, + "grad_norm": 0.009956144727766514, + "learning_rate": 1.8331550371982503e-06, + "loss": 0.0203, + "num_input_tokens_seen": 4990400, + "step": 10150 + }, + { + "epoch": 1.3402402006071004, + "grad_norm": 0.6339499354362488, + "learning_rate": 1.8329001645329364e-06, + "loss": 0.071, + "num_input_tokens_seen": 4992960, + "step": 10155 + }, + { + "epoch": 1.3409000923848489, + "grad_norm": 2.1077048778533936, + "learning_rate": 1.8326451150905945e-06, + "loss": 0.0032, + "num_input_tokens_seen": 4995584, + "step": 10160 + }, + { + "epoch": 1.3415599841625974, + "grad_norm": 0.00569473672658205, + "learning_rate": 1.8323898889253562e-06, + "loss": 0.1142, + "num_input_tokens_seen": 4997952, + "step": 10165 + }, + { + "epoch": 1.3422198759403459, + "grad_norm": 0.04009333997964859, + "learning_rate": 1.8321344860913918e-06, + "loss": 0.1238, + "num_input_tokens_seen": 5000000, + "step": 10170 + }, + { + "epoch": 1.3428797677180944, + "grad_norm": 0.04034736752510071, + "learning_rate": 1.8318789066429083e-06, + "loss": 0.0662, + "num_input_tokens_seen": 5002688, + "step": 10175 + }, + { + "epoch": 1.3435396594958426, + "grad_norm": 0.04818149283528328, + "learning_rate": 1.831623150634151e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5005184, + "step": 10180 + }, + { + "epoch": 1.3441995512735911, + "grad_norm": 0.040782514959573746, + "learning_rate": 1.8313672181194023e-06, + "loss": 0.1373, + "num_input_tokens_seen": 5007424, + "step": 10185 + }, + { + "epoch": 1.3448594430513396, + "grad_norm": 22.4898681640625, + "learning_rate": 1.8311111091529817e-06, + "loss": 0.0557, + "num_input_tokens_seen": 5010112, + "step": 10190 + }, + { + "epoch": 1.345519334829088, + "grad_norm": 0.39273348450660706, + "learning_rate": 1.8308548237892465e-06, + "loss": 0.0594, + "num_input_tokens_seen": 5012736, + "step": 10195 + }, + { + "epoch": 1.3461792266068364, + "grad_norm": 19.32087516784668, + "learning_rate": 1.8305983620825915e-06, + "loss": 0.0539, + "num_input_tokens_seen": 5015040, + "step": 10200 + }, + { + "epoch": 1.346839118384585, + "grad_norm": 0.05848585441708565, + "learning_rate": 1.8303417240874492e-06, + "loss": 0.0573, + "num_input_tokens_seen": 5017344, + "step": 10205 + }, + { + "epoch": 1.3474990101623334, + "grad_norm": 0.03271415829658508, + "learning_rate": 1.8300849098582886e-06, + "loss": 0.0528, + "num_input_tokens_seen": 5019776, + "step": 10210 + }, + { + "epoch": 1.348158901940082, + "grad_norm": 0.1817331165075302, + "learning_rate": 1.829827919449617e-06, + "loss": 0.0395, + "num_input_tokens_seen": 5022272, + "step": 10215 + }, + { + "epoch": 1.3488187937178302, + "grad_norm": 31.19110679626465, + "learning_rate": 1.8295707529159783e-06, + "loss": 0.1797, + "num_input_tokens_seen": 5024768, + "step": 10220 + }, + { + "epoch": 1.3494786854955787, + "grad_norm": 0.15519972145557404, + "learning_rate": 1.829313410311955e-06, + "loss": 0.1089, + "num_input_tokens_seen": 5027072, + "step": 10225 + }, + { + "epoch": 1.3501385772733272, + "grad_norm": 57.0513801574707, + "learning_rate": 1.8290558916921656e-06, + "loss": 0.1722, + "num_input_tokens_seen": 5029568, + "step": 10230 + }, + { + "epoch": 1.3507984690510757, + "grad_norm": 0.490141361951828, + "learning_rate": 1.8287981971112668e-06, + "loss": 0.0379, + "num_input_tokens_seen": 5032256, + "step": 10235 + }, + { + "epoch": 1.3514583608288242, + "grad_norm": 0.43738701939582825, + "learning_rate": 1.8285403266239521e-06, + "loss": 0.0258, + "num_input_tokens_seen": 5034944, + "step": 10240 + }, + { + "epoch": 1.3521182526065725, + "grad_norm": 18.965534210205078, + "learning_rate": 1.8282822802849531e-06, + "loss": 0.2289, + "num_input_tokens_seen": 5037440, + "step": 10245 + }, + { + "epoch": 1.352778144384321, + "grad_norm": 0.20755289494991302, + "learning_rate": 1.8280240581490381e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5040128, + "step": 10250 + }, + { + "epoch": 1.3534380361620695, + "grad_norm": 0.1373784840106964, + "learning_rate": 1.8277656602710127e-06, + "loss": 0.0257, + "num_input_tokens_seen": 5042624, + "step": 10255 + }, + { + "epoch": 1.3540979279398178, + "grad_norm": 0.5778612494468689, + "learning_rate": 1.8275070867057203e-06, + "loss": 0.0817, + "num_input_tokens_seen": 5044928, + "step": 10260 + }, + { + "epoch": 1.3547578197175663, + "grad_norm": 0.06566134095191956, + "learning_rate": 1.827248337508041e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5047488, + "step": 10265 + }, + { + "epoch": 1.3554177114953148, + "grad_norm": 0.06912878155708313, + "learning_rate": 1.8269894127328925e-06, + "loss": 0.0283, + "num_input_tokens_seen": 5050368, + "step": 10270 + }, + { + "epoch": 1.3560776032730633, + "grad_norm": 0.05937294289469719, + "learning_rate": 1.8267303124352295e-06, + "loss": 0.0452, + "num_input_tokens_seen": 5052736, + "step": 10275 + }, + { + "epoch": 1.3567374950508118, + "grad_norm": 16.943443298339844, + "learning_rate": 1.826471036670045e-06, + "loss": 0.1482, + "num_input_tokens_seen": 5055168, + "step": 10280 + }, + { + "epoch": 1.35739738682856, + "grad_norm": 0.060737937688827515, + "learning_rate": 1.8262115854923673e-06, + "loss": 0.0281, + "num_input_tokens_seen": 5057664, + "step": 10285 + }, + { + "epoch": 1.3580572786063085, + "grad_norm": 0.23668773472309113, + "learning_rate": 1.8259519589572637e-06, + "loss": 0.1029, + "num_input_tokens_seen": 5060160, + "step": 10290 + }, + { + "epoch": 1.358717170384057, + "grad_norm": 0.045837774872779846, + "learning_rate": 1.8256921571198376e-06, + "loss": 0.0132, + "num_input_tokens_seen": 5062912, + "step": 10295 + }, + { + "epoch": 1.3593770621618055, + "grad_norm": 0.024858810007572174, + "learning_rate": 1.8254321800352308e-06, + "loss": 0.0083, + "num_input_tokens_seen": 5065216, + "step": 10300 + }, + { + "epoch": 1.360036953939554, + "grad_norm": 116.51515197753906, + "learning_rate": 1.8251720277586209e-06, + "loss": 0.0474, + "num_input_tokens_seen": 5067456, + "step": 10305 + }, + { + "epoch": 1.3606968457173023, + "grad_norm": 71.86284637451172, + "learning_rate": 1.8249117003452233e-06, + "loss": 0.2756, + "num_input_tokens_seen": 5069760, + "step": 10310 + }, + { + "epoch": 1.3613567374950508, + "grad_norm": 0.07100139558315277, + "learning_rate": 1.8246511978502912e-06, + "loss": 0.1271, + "num_input_tokens_seen": 5072320, + "step": 10315 + }, + { + "epoch": 1.3620166292727993, + "grad_norm": 0.04098708555102348, + "learning_rate": 1.8243905203291136e-06, + "loss": 0.0017, + "num_input_tokens_seen": 5074816, + "step": 10320 + }, + { + "epoch": 1.3626765210505476, + "grad_norm": 0.02675493434071541, + "learning_rate": 1.8241296678370184e-06, + "loss": 0.0858, + "num_input_tokens_seen": 5077312, + "step": 10325 + }, + { + "epoch": 1.363336412828296, + "grad_norm": 0.03876377269625664, + "learning_rate": 1.8238686404293686e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5079616, + "step": 10330 + }, + { + "epoch": 1.3639963046060446, + "grad_norm": 0.04434245079755783, + "learning_rate": 1.8236074381615661e-06, + "loss": 0.3048, + "num_input_tokens_seen": 5081664, + "step": 10335 + }, + { + "epoch": 1.364656196383793, + "grad_norm": 0.2147568315267563, + "learning_rate": 1.823346061089049e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5084224, + "step": 10340 + }, + { + "epoch": 1.3653160881615416, + "grad_norm": 6.88644552230835, + "learning_rate": 1.8230845092672925e-06, + "loss": 0.0951, + "num_input_tokens_seen": 5086528, + "step": 10345 + }, + { + "epoch": 1.3659759799392899, + "grad_norm": 17.738170623779297, + "learning_rate": 1.8228227827518093e-06, + "loss": 0.167, + "num_input_tokens_seen": 5088960, + "step": 10350 + }, + { + "epoch": 1.3666358717170384, + "grad_norm": 85.15935516357422, + "learning_rate": 1.8225608815981488e-06, + "loss": 0.0787, + "num_input_tokens_seen": 5091392, + "step": 10355 + }, + { + "epoch": 1.3672957634947869, + "grad_norm": 0.06308019161224365, + "learning_rate": 1.8222988058618976e-06, + "loss": 0.0948, + "num_input_tokens_seen": 5093888, + "step": 10360 + }, + { + "epoch": 1.3679556552725354, + "grad_norm": 17.740474700927734, + "learning_rate": 1.8220365555986797e-06, + "loss": 0.15, + "num_input_tokens_seen": 5096256, + "step": 10365 + }, + { + "epoch": 1.3686155470502839, + "grad_norm": 0.6935084462165833, + "learning_rate": 1.8217741308641553e-06, + "loss": 0.0489, + "num_input_tokens_seen": 5098816, + "step": 10370 + }, + { + "epoch": 1.3692754388280322, + "grad_norm": 0.2454441636800766, + "learning_rate": 1.8215115317140226e-06, + "loss": 0.0487, + "num_input_tokens_seen": 5101248, + "step": 10375 + }, + { + "epoch": 1.3699353306057807, + "grad_norm": 1.8006916046142578, + "learning_rate": 1.8212487582040164e-06, + "loss": 0.0838, + "num_input_tokens_seen": 5103488, + "step": 10380 + }, + { + "epoch": 1.3705952223835292, + "grad_norm": 0.09306250512599945, + "learning_rate": 1.8209858103899081e-06, + "loss": 0.2107, + "num_input_tokens_seen": 5105920, + "step": 10385 + }, + { + "epoch": 1.3712551141612774, + "grad_norm": 0.3448830842971802, + "learning_rate": 1.8207226883275067e-06, + "loss": 0.001, + "num_input_tokens_seen": 5108352, + "step": 10390 + }, + { + "epoch": 1.371915005939026, + "grad_norm": 0.08386199176311493, + "learning_rate": 1.820459392072658e-06, + "loss": 0.1689, + "num_input_tokens_seen": 5110784, + "step": 10395 + }, + { + "epoch": 1.3725748977167744, + "grad_norm": 0.41205623745918274, + "learning_rate": 1.8201959216812443e-06, + "loss": 0.121, + "num_input_tokens_seen": 5113344, + "step": 10400 + }, + { + "epoch": 1.373234789494523, + "grad_norm": 0.1537424772977829, + "learning_rate": 1.8199322772091858e-06, + "loss": 0.0541, + "num_input_tokens_seen": 5115712, + "step": 10405 + }, + { + "epoch": 1.3738946812722714, + "grad_norm": 15.446474075317383, + "learning_rate": 1.819668458712439e-06, + "loss": 0.0519, + "num_input_tokens_seen": 5117952, + "step": 10410 + }, + { + "epoch": 1.3745545730500197, + "grad_norm": 0.19189509749412537, + "learning_rate": 1.8194044662469973e-06, + "loss": 0.0012, + "num_input_tokens_seen": 5120128, + "step": 10415 + }, + { + "epoch": 1.3752144648277682, + "grad_norm": 1.3778020143508911, + "learning_rate": 1.8191402998688913e-06, + "loss": 0.0045, + "num_input_tokens_seen": 5122432, + "step": 10420 + }, + { + "epoch": 1.3758743566055167, + "grad_norm": 0.09211282432079315, + "learning_rate": 1.8188759596341888e-06, + "loss": 0.0804, + "num_input_tokens_seen": 5125056, + "step": 10425 + }, + { + "epoch": 1.3765342483832652, + "grad_norm": 0.12644384801387787, + "learning_rate": 1.8186114455989933e-06, + "loss": 0.0818, + "num_input_tokens_seen": 5127424, + "step": 10430 + }, + { + "epoch": 1.3771941401610137, + "grad_norm": 0.07302141934633255, + "learning_rate": 1.8183467578194467e-06, + "loss": 0.0692, + "num_input_tokens_seen": 5129792, + "step": 10435 + }, + { + "epoch": 1.377854031938762, + "grad_norm": 0.0963432714343071, + "learning_rate": 1.8180818963517264e-06, + "loss": 0.1073, + "num_input_tokens_seen": 5132032, + "step": 10440 + }, + { + "epoch": 1.3785139237165105, + "grad_norm": 0.046357661485672, + "learning_rate": 1.8178168612520478e-06, + "loss": 0.0095, + "num_input_tokens_seen": 5134400, + "step": 10445 + }, + { + "epoch": 1.379173815494259, + "grad_norm": 0.05055604875087738, + "learning_rate": 1.8175516525766627e-06, + "loss": 0.0715, + "num_input_tokens_seen": 5136640, + "step": 10450 + }, + { + "epoch": 1.3798337072720073, + "grad_norm": 17.73884391784668, + "learning_rate": 1.8172862703818593e-06, + "loss": 0.1421, + "num_input_tokens_seen": 5139136, + "step": 10455 + }, + { + "epoch": 1.3804935990497558, + "grad_norm": 0.03119855374097824, + "learning_rate": 1.8170207147239636e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5141632, + "step": 10460 + }, + { + "epoch": 1.3811534908275043, + "grad_norm": 16.85359764099121, + "learning_rate": 1.8167549856593374e-06, + "loss": 0.0696, + "num_input_tokens_seen": 5144320, + "step": 10465 + }, + { + "epoch": 1.3818133826052528, + "grad_norm": 50.230125427246094, + "learning_rate": 1.81648908324438e-06, + "loss": 0.2092, + "num_input_tokens_seen": 5146880, + "step": 10470 + }, + { + "epoch": 1.3824732743830013, + "grad_norm": 0.6590256690979004, + "learning_rate": 1.8162230075355277e-06, + "loss": 0.0023, + "num_input_tokens_seen": 5149632, + "step": 10475 + }, + { + "epoch": 1.3831331661607495, + "grad_norm": 0.7845008969306946, + "learning_rate": 1.8159567585892521e-06, + "loss": 0.0611, + "num_input_tokens_seen": 5151936, + "step": 10480 + }, + { + "epoch": 1.383793057938498, + "grad_norm": 18.889646530151367, + "learning_rate": 1.8156903364620632e-06, + "loss": 0.2547, + "num_input_tokens_seen": 5154368, + "step": 10485 + }, + { + "epoch": 1.3844529497162466, + "grad_norm": 2.00492525100708, + "learning_rate": 1.8154237412105074e-06, + "loss": 0.0018, + "num_input_tokens_seen": 5156736, + "step": 10490 + }, + { + "epoch": 1.385112841493995, + "grad_norm": 24.936811447143555, + "learning_rate": 1.8151569728911672e-06, + "loss": 0.203, + "num_input_tokens_seen": 5159104, + "step": 10495 + }, + { + "epoch": 1.3857727332717436, + "grad_norm": 0.39635398983955383, + "learning_rate": 1.8148900315606625e-06, + "loss": 0.1597, + "num_input_tokens_seen": 5161472, + "step": 10500 + }, + { + "epoch": 1.3864326250494918, + "grad_norm": 0.1377970278263092, + "learning_rate": 1.8146229172756495e-06, + "loss": 0.0015, + "num_input_tokens_seen": 5163904, + "step": 10505 + }, + { + "epoch": 1.3870925168272403, + "grad_norm": 0.060544900596141815, + "learning_rate": 1.8143556300928214e-06, + "loss": 0.0844, + "num_input_tokens_seen": 5166464, + "step": 10510 + }, + { + "epoch": 1.3877524086049888, + "grad_norm": 0.048183392733335495, + "learning_rate": 1.814088170068908e-06, + "loss": 0.0516, + "num_input_tokens_seen": 5168704, + "step": 10515 + }, + { + "epoch": 1.388412300382737, + "grad_norm": 0.13653618097305298, + "learning_rate": 1.8138205372606756e-06, + "loss": 0.0833, + "num_input_tokens_seen": 5171200, + "step": 10520 + }, + { + "epoch": 1.3890721921604856, + "grad_norm": 0.6845338344573975, + "learning_rate": 1.8135527317249273e-06, + "loss": 0.0015, + "num_input_tokens_seen": 5173504, + "step": 10525 + }, + { + "epoch": 1.389732083938234, + "grad_norm": 32.81275177001953, + "learning_rate": 1.8132847535185029e-06, + "loss": 0.0479, + "num_input_tokens_seen": 5176064, + "step": 10530 + }, + { + "epoch": 1.3903919757159826, + "grad_norm": 0.12121167778968811, + "learning_rate": 1.8130166026982795e-06, + "loss": 0.0021, + "num_input_tokens_seen": 5178816, + "step": 10535 + }, + { + "epoch": 1.391051867493731, + "grad_norm": 0.03380153700709343, + "learning_rate": 1.8127482793211688e-06, + "loss": 0.0802, + "num_input_tokens_seen": 5181248, + "step": 10540 + }, + { + "epoch": 1.3917117592714794, + "grad_norm": 0.0383787527680397, + "learning_rate": 1.8124797834441217e-06, + "loss": 0.0009, + "num_input_tokens_seen": 5183552, + "step": 10545 + }, + { + "epoch": 1.3923716510492279, + "grad_norm": 0.15044409036636353, + "learning_rate": 1.812211115124124e-06, + "loss": 0.0769, + "num_input_tokens_seen": 5185728, + "step": 10550 + }, + { + "epoch": 1.3930315428269764, + "grad_norm": 0.009135999716818333, + "learning_rate": 1.8119422744181984e-06, + "loss": 0.0521, + "num_input_tokens_seen": 5188224, + "step": 10555 + }, + { + "epoch": 1.3936914346047249, + "grad_norm": 23.88149070739746, + "learning_rate": 1.8116732613834053e-06, + "loss": 0.1086, + "num_input_tokens_seen": 5191104, + "step": 10560 + }, + { + "epoch": 1.3943513263824734, + "grad_norm": 0.011752346530556679, + "learning_rate": 1.81140407607684e-06, + "loss": 0.1069, + "num_input_tokens_seen": 5193600, + "step": 10565 + }, + { + "epoch": 1.3950112181602217, + "grad_norm": 0.059345416724681854, + "learning_rate": 1.8111347185556348e-06, + "loss": 0.0569, + "num_input_tokens_seen": 5196032, + "step": 10570 + }, + { + "epoch": 1.3956711099379702, + "grad_norm": 0.07716865092515945, + "learning_rate": 1.8108651888769595e-06, + "loss": 0.0003, + "num_input_tokens_seen": 5198656, + "step": 10575 + }, + { + "epoch": 1.3963310017157187, + "grad_norm": 0.009854006581008434, + "learning_rate": 1.8105954870980198e-06, + "loss": 0.146, + "num_input_tokens_seen": 5200960, + "step": 10580 + }, + { + "epoch": 1.396990893493467, + "grad_norm": 0.023813385516405106, + "learning_rate": 1.810325613276058e-06, + "loss": 0.0001, + "num_input_tokens_seen": 5203520, + "step": 10585 + }, + { + "epoch": 1.3976507852712154, + "grad_norm": 0.16299042105674744, + "learning_rate": 1.8100555674683524e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5206144, + "step": 10590 + }, + { + "epoch": 1.398310677048964, + "grad_norm": 0.0172832403331995, + "learning_rate": 1.8097853497322188e-06, + "loss": 0.0002, + "num_input_tokens_seen": 5208768, + "step": 10595 + }, + { + "epoch": 1.3989705688267124, + "grad_norm": 0.24835462868213654, + "learning_rate": 1.8095149601250088e-06, + "loss": 0.0942, + "num_input_tokens_seen": 5211136, + "step": 10600 + }, + { + "epoch": 1.399630460604461, + "grad_norm": 0.06975623965263367, + "learning_rate": 1.8092443987041104e-06, + "loss": 0.066, + "num_input_tokens_seen": 5213504, + "step": 10605 + }, + { + "epoch": 1.4002903523822092, + "grad_norm": 0.04890378192067146, + "learning_rate": 1.8089736655269486e-06, + "loss": 0.0834, + "num_input_tokens_seen": 5216000, + "step": 10610 + }, + { + "epoch": 1.4009502441599577, + "grad_norm": 0.687415599822998, + "learning_rate": 1.8087027606509842e-06, + "loss": 0.14, + "num_input_tokens_seen": 5218688, + "step": 10615 + }, + { + "epoch": 1.4016101359377062, + "grad_norm": 0.25652775168418884, + "learning_rate": 1.808431684133715e-06, + "loss": 0.0022, + "num_input_tokens_seen": 5221440, + "step": 10620 + }, + { + "epoch": 1.4022700277154547, + "grad_norm": 1.2399516105651855, + "learning_rate": 1.8081604360326753e-06, + "loss": 0.1496, + "num_input_tokens_seen": 5223616, + "step": 10625 + }, + { + "epoch": 1.4029299194932032, + "grad_norm": 0.3370029330253601, + "learning_rate": 1.807889016405435e-06, + "loss": 0.0769, + "num_input_tokens_seen": 5226176, + "step": 10630 + }, + { + "epoch": 1.4035898112709515, + "grad_norm": 0.04719403013586998, + "learning_rate": 1.8076174253096014e-06, + "loss": 0.0013, + "num_input_tokens_seen": 5228480, + "step": 10635 + }, + { + "epoch": 1.4042497030487, + "grad_norm": 0.015282729640603065, + "learning_rate": 1.8073456628028177e-06, + "loss": 0.1115, + "num_input_tokens_seen": 5230912, + "step": 10640 + }, + { + "epoch": 1.4049095948264485, + "grad_norm": 21.17472267150879, + "learning_rate": 1.8070737289427631e-06, + "loss": 0.0883, + "num_input_tokens_seen": 5233536, + "step": 10645 + }, + { + "epoch": 1.4055694866041968, + "grad_norm": 0.05179164931178093, + "learning_rate": 1.8068016237871541e-06, + "loss": 0.0003, + "num_input_tokens_seen": 5236096, + "step": 10650 + }, + { + "epoch": 1.4062293783819453, + "grad_norm": 0.01727340929210186, + "learning_rate": 1.8065293473937429e-06, + "loss": 0.1611, + "num_input_tokens_seen": 5238464, + "step": 10655 + }, + { + "epoch": 1.4068892701596938, + "grad_norm": 87.70179748535156, + "learning_rate": 1.806256899820318e-06, + "loss": 0.3064, + "num_input_tokens_seen": 5241088, + "step": 10660 + }, + { + "epoch": 1.4075491619374423, + "grad_norm": 0.25517573952674866, + "learning_rate": 1.8059842811247048e-06, + "loss": 0.2462, + "num_input_tokens_seen": 5243584, + "step": 10665 + }, + { + "epoch": 1.4082090537151908, + "grad_norm": 0.09056201577186584, + "learning_rate": 1.805711491364764e-06, + "loss": 0.0303, + "num_input_tokens_seen": 5246016, + "step": 10670 + }, + { + "epoch": 1.408868945492939, + "grad_norm": 0.1641373485326767, + "learning_rate": 1.8054385305983942e-06, + "loss": 0.0026, + "num_input_tokens_seen": 5248192, + "step": 10675 + }, + { + "epoch": 1.4095288372706876, + "grad_norm": 107.500732421875, + "learning_rate": 1.8051653988835284e-06, + "loss": 0.1616, + "num_input_tokens_seen": 5250752, + "step": 10680 + }, + { + "epoch": 1.410188729048436, + "grad_norm": 0.016604578122496605, + "learning_rate": 1.8048920962781372e-06, + "loss": 0.1854, + "num_input_tokens_seen": 5253120, + "step": 10685 + }, + { + "epoch": 1.4108486208261846, + "grad_norm": 0.2926078736782074, + "learning_rate": 1.8046186228402273e-06, + "loss": 0.0559, + "num_input_tokens_seen": 5255808, + "step": 10690 + }, + { + "epoch": 1.411508512603933, + "grad_norm": 0.06410674005746841, + "learning_rate": 1.8043449786278413e-06, + "loss": 0.0009, + "num_input_tokens_seen": 5258112, + "step": 10695 + }, + { + "epoch": 1.4121684043816813, + "grad_norm": 20.56272315979004, + "learning_rate": 1.8040711636990581e-06, + "loss": 0.0902, + "num_input_tokens_seen": 5260800, + "step": 10700 + }, + { + "epoch": 1.4128282961594298, + "grad_norm": 0.3491075336933136, + "learning_rate": 1.8037971781119931e-06, + "loss": 0.0008, + "num_input_tokens_seen": 5263104, + "step": 10705 + }, + { + "epoch": 1.4134881879371783, + "grad_norm": 0.20114921033382416, + "learning_rate": 1.8035230219247977e-06, + "loss": 0.204, + "num_input_tokens_seen": 5265472, + "step": 10710 + }, + { + "epoch": 1.4141480797149266, + "grad_norm": 14.344511032104492, + "learning_rate": 1.8032486951956596e-06, + "loss": 0.0555, + "num_input_tokens_seen": 5268160, + "step": 10715 + }, + { + "epoch": 1.4148079714926751, + "grad_norm": 25.318134307861328, + "learning_rate": 1.8029741979828026e-06, + "loss": 0.115, + "num_input_tokens_seen": 5270400, + "step": 10720 + }, + { + "epoch": 1.4154678632704236, + "grad_norm": 0.02228490076959133, + "learning_rate": 1.8026995303444867e-06, + "loss": 0.0623, + "num_input_tokens_seen": 5272768, + "step": 10725 + }, + { + "epoch": 1.4161277550481721, + "grad_norm": 0.05160384625196457, + "learning_rate": 1.802424692339008e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5275584, + "step": 10730 + }, + { + "epoch": 1.4167876468259206, + "grad_norm": 0.05498448386788368, + "learning_rate": 1.8021496840246994e-06, + "loss": 0.0371, + "num_input_tokens_seen": 5277824, + "step": 10735 + }, + { + "epoch": 1.417447538603669, + "grad_norm": 0.020905766636133194, + "learning_rate": 1.8018745054599292e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5280512, + "step": 10740 + }, + { + "epoch": 1.4181074303814174, + "grad_norm": 0.14113740622997284, + "learning_rate": 1.8015991567031015e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5283136, + "step": 10745 + }, + { + "epoch": 1.418767322159166, + "grad_norm": 0.9651300311088562, + "learning_rate": 1.8013236378126577e-06, + "loss": 0.0802, + "num_input_tokens_seen": 5285568, + "step": 10750 + }, + { + "epoch": 1.4194272139369144, + "grad_norm": 0.0940876454114914, + "learning_rate": 1.8010479488470743e-06, + "loss": 0.0573, + "num_input_tokens_seen": 5287936, + "step": 10755 + }, + { + "epoch": 1.420087105714663, + "grad_norm": 0.052880752831697464, + "learning_rate": 1.8007720898648645e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5289984, + "step": 10760 + }, + { + "epoch": 1.4207469974924112, + "grad_norm": 13.744636535644531, + "learning_rate": 1.8004960609245778e-06, + "loss": 0.1727, + "num_input_tokens_seen": 5292352, + "step": 10765 + }, + { + "epoch": 1.4214068892701597, + "grad_norm": 0.0709216445684433, + "learning_rate": 1.8002198620847988e-06, + "loss": 0.0008, + "num_input_tokens_seen": 5294720, + "step": 10770 + }, + { + "epoch": 1.4220667810479082, + "grad_norm": 0.01790332980453968, + "learning_rate": 1.7999434934041485e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5297024, + "step": 10775 + }, + { + "epoch": 1.4227266728256565, + "grad_norm": 0.0645064190030098, + "learning_rate": 1.7996669549412847e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5299584, + "step": 10780 + }, + { + "epoch": 1.4233865646034052, + "grad_norm": 0.03778925910592079, + "learning_rate": 1.7993902467549002e-06, + "loss": 0.1082, + "num_input_tokens_seen": 5301888, + "step": 10785 + }, + { + "epoch": 1.4240464563811535, + "grad_norm": 0.034117791801691055, + "learning_rate": 1.7991133689037247e-06, + "loss": 0.1271, + "num_input_tokens_seen": 5304256, + "step": 10790 + }, + { + "epoch": 1.424706348158902, + "grad_norm": 23.118024826049805, + "learning_rate": 1.7988363214465233e-06, + "loss": 0.1573, + "num_input_tokens_seen": 5306688, + "step": 10795 + }, + { + "epoch": 1.4253662399366505, + "grad_norm": 0.07332354784011841, + "learning_rate": 1.7985591044420975e-06, + "loss": 0.0624, + "num_input_tokens_seen": 5309248, + "step": 10800 + }, + { + "epoch": 1.4260261317143987, + "grad_norm": 0.08144375681877136, + "learning_rate": 1.7982817179492847e-06, + "loss": 0.001, + "num_input_tokens_seen": 5311552, + "step": 10805 + }, + { + "epoch": 1.4266860234921472, + "grad_norm": 0.09295511990785599, + "learning_rate": 1.7980041620269577e-06, + "loss": 0.0589, + "num_input_tokens_seen": 5314048, + "step": 10810 + }, + { + "epoch": 1.4273459152698957, + "grad_norm": 0.024405941367149353, + "learning_rate": 1.7977264367340262e-06, + "loss": 0.0665, + "num_input_tokens_seen": 5316480, + "step": 10815 + }, + { + "epoch": 1.4280058070476442, + "grad_norm": 0.023482978343963623, + "learning_rate": 1.7974485421294347e-06, + "loss": 0.0012, + "num_input_tokens_seen": 5318720, + "step": 10820 + }, + { + "epoch": 1.4286656988253927, + "grad_norm": 0.24007222056388855, + "learning_rate": 1.7971704782721652e-06, + "loss": 0.0552, + "num_input_tokens_seen": 5321344, + "step": 10825 + }, + { + "epoch": 1.429325590603141, + "grad_norm": 0.02005026862025261, + "learning_rate": 1.7968922452212342e-06, + "loss": 0.1752, + "num_input_tokens_seen": 5323584, + "step": 10830 + }, + { + "epoch": 1.4299854823808895, + "grad_norm": 53.96958923339844, + "learning_rate": 1.796613843035695e-06, + "loss": 0.2167, + "num_input_tokens_seen": 5326208, + "step": 10835 + }, + { + "epoch": 1.430645374158638, + "grad_norm": 28.23914909362793, + "learning_rate": 1.796335271774636e-06, + "loss": 0.0647, + "num_input_tokens_seen": 5328768, + "step": 10840 + }, + { + "epoch": 1.4313052659363863, + "grad_norm": 13.797289848327637, + "learning_rate": 1.7960565314971823e-06, + "loss": 0.1761, + "num_input_tokens_seen": 5331264, + "step": 10845 + }, + { + "epoch": 1.431965157714135, + "grad_norm": 0.43690750002861023, + "learning_rate": 1.7957776222624946e-06, + "loss": 0.0024, + "num_input_tokens_seen": 5333632, + "step": 10850 + }, + { + "epoch": 1.4326250494918833, + "grad_norm": 0.10744563490152359, + "learning_rate": 1.7954985441297684e-06, + "loss": 0.001, + "num_input_tokens_seen": 5336192, + "step": 10855 + }, + { + "epoch": 1.4332849412696318, + "grad_norm": 0.3047581613063812, + "learning_rate": 1.7952192971582374e-06, + "loss": 0.0111, + "num_input_tokens_seen": 5338496, + "step": 10860 + }, + { + "epoch": 1.4339448330473803, + "grad_norm": 26.489316940307617, + "learning_rate": 1.794939881407169e-06, + "loss": 0.0531, + "num_input_tokens_seen": 5340992, + "step": 10865 + }, + { + "epoch": 1.4346047248251286, + "grad_norm": 0.04672485962510109, + "learning_rate": 1.7946602969358673e-06, + "loss": 0.0015, + "num_input_tokens_seen": 5343552, + "step": 10870 + }, + { + "epoch": 1.435264616602877, + "grad_norm": 0.5452134609222412, + "learning_rate": 1.7943805438036718e-06, + "loss": 0.0022, + "num_input_tokens_seen": 5346176, + "step": 10875 + }, + { + "epoch": 1.4359245083806256, + "grad_norm": 0.16802646219730377, + "learning_rate": 1.7941006220699588e-06, + "loss": 0.0216, + "num_input_tokens_seen": 5348800, + "step": 10880 + }, + { + "epoch": 1.436584400158374, + "grad_norm": 0.010055284947156906, + "learning_rate": 1.7938205317941386e-06, + "loss": 0.0736, + "num_input_tokens_seen": 5351424, + "step": 10885 + }, + { + "epoch": 1.4372442919361226, + "grad_norm": 0.03935292735695839, + "learning_rate": 1.7935402730356594e-06, + "loss": 0.0003, + "num_input_tokens_seen": 5354048, + "step": 10890 + }, + { + "epoch": 1.4379041837138709, + "grad_norm": 0.11049910634756088, + "learning_rate": 1.7932598458540036e-06, + "loss": 0.0753, + "num_input_tokens_seen": 5356416, + "step": 10895 + }, + { + "epoch": 1.4385640754916194, + "grad_norm": 0.021031470969319344, + "learning_rate": 1.7929792503086897e-06, + "loss": 0.03, + "num_input_tokens_seen": 5358848, + "step": 10900 + }, + { + "epoch": 1.4392239672693679, + "grad_norm": 0.01888999529182911, + "learning_rate": 1.792698486459272e-06, + "loss": 0.0655, + "num_input_tokens_seen": 5361344, + "step": 10905 + }, + { + "epoch": 1.4398838590471164, + "grad_norm": 0.010403584688901901, + "learning_rate": 1.7924175543653411e-06, + "loss": 0.0519, + "num_input_tokens_seen": 5363904, + "step": 10910 + }, + { + "epoch": 1.4405437508248649, + "grad_norm": 11.720383644104004, + "learning_rate": 1.7921364540865224e-06, + "loss": 0.0833, + "num_input_tokens_seen": 5366144, + "step": 10915 + }, + { + "epoch": 1.4412036426026131, + "grad_norm": 0.011344925500452518, + "learning_rate": 1.7918551856824776e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5368448, + "step": 10920 + }, + { + "epoch": 1.4418635343803616, + "grad_norm": 0.036906708031892776, + "learning_rate": 1.7915737492129037e-06, + "loss": 0.1, + "num_input_tokens_seen": 5370624, + "step": 10925 + }, + { + "epoch": 1.4425234261581101, + "grad_norm": 198.46531677246094, + "learning_rate": 1.7912921447375338e-06, + "loss": 0.0986, + "num_input_tokens_seen": 5373376, + "step": 10930 + }, + { + "epoch": 1.4431833179358584, + "grad_norm": 48.49536895751953, + "learning_rate": 1.7910103723161362e-06, + "loss": 0.1395, + "num_input_tokens_seen": 5376064, + "step": 10935 + }, + { + "epoch": 1.443843209713607, + "grad_norm": 0.03592463955283165, + "learning_rate": 1.7907284320085153e-06, + "loss": 0.0167, + "num_input_tokens_seen": 5378624, + "step": 10940 + }, + { + "epoch": 1.4445031014913554, + "grad_norm": 19.094186782836914, + "learning_rate": 1.7904463238745105e-06, + "loss": 0.1895, + "num_input_tokens_seen": 5381312, + "step": 10945 + }, + { + "epoch": 1.445162993269104, + "grad_norm": 0.653519332408905, + "learning_rate": 1.7901640479739974e-06, + "loss": 0.1904, + "num_input_tokens_seen": 5383616, + "step": 10950 + }, + { + "epoch": 1.4458228850468524, + "grad_norm": 1.8942596912384033, + "learning_rate": 1.789881604366887e-06, + "loss": 0.1212, + "num_input_tokens_seen": 5386368, + "step": 10955 + }, + { + "epoch": 1.4464827768246007, + "grad_norm": 0.04864390566945076, + "learning_rate": 1.7895989931131262e-06, + "loss": 0.1056, + "num_input_tokens_seen": 5388736, + "step": 10960 + }, + { + "epoch": 1.4471426686023492, + "grad_norm": 0.1541789174079895, + "learning_rate": 1.7893162142726967e-06, + "loss": 0.0437, + "num_input_tokens_seen": 5391232, + "step": 10965 + }, + { + "epoch": 1.4478025603800977, + "grad_norm": 0.09908580034971237, + "learning_rate": 1.7890332679056165e-06, + "loss": 0.0009, + "num_input_tokens_seen": 5393792, + "step": 10970 + }, + { + "epoch": 1.4484624521578462, + "grad_norm": 31.06180763244629, + "learning_rate": 1.7887501540719389e-06, + "loss": 0.1911, + "num_input_tokens_seen": 5396416, + "step": 10975 + }, + { + "epoch": 1.4491223439355947, + "grad_norm": 0.2622423768043518, + "learning_rate": 1.7884668728317531e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5399232, + "step": 10980 + }, + { + "epoch": 1.449782235713343, + "grad_norm": 0.028448861092329025, + "learning_rate": 1.7881834242451829e-06, + "loss": 0.0615, + "num_input_tokens_seen": 5401664, + "step": 10985 + }, + { + "epoch": 1.4504421274910915, + "grad_norm": 0.1454077512025833, + "learning_rate": 1.7878998083723883e-06, + "loss": 0.001, + "num_input_tokens_seen": 5404224, + "step": 10990 + }, + { + "epoch": 1.45110201926884, + "grad_norm": 26.754520416259766, + "learning_rate": 1.7876160252735652e-06, + "loss": 0.1332, + "num_input_tokens_seen": 5406336, + "step": 10995 + }, + { + "epoch": 1.4517619110465882, + "grad_norm": 0.21431031823158264, + "learning_rate": 1.7873320750089443e-06, + "loss": 0.0553, + "num_input_tokens_seen": 5408832, + "step": 11000 + }, + { + "epoch": 1.4524218028243367, + "grad_norm": 0.18584635853767395, + "learning_rate": 1.7870479576387916e-06, + "loss": 0.0357, + "num_input_tokens_seen": 5411136, + "step": 11005 + }, + { + "epoch": 1.4530816946020852, + "grad_norm": 0.03457614406943321, + "learning_rate": 1.7867636732234094e-06, + "loss": 0.1594, + "num_input_tokens_seen": 5413376, + "step": 11010 + }, + { + "epoch": 1.4537415863798338, + "grad_norm": 0.2158297598361969, + "learning_rate": 1.7864792218231348e-06, + "loss": 0.0837, + "num_input_tokens_seen": 5415680, + "step": 11015 + }, + { + "epoch": 1.4544014781575823, + "grad_norm": 0.02059786207973957, + "learning_rate": 1.7861946034983406e-06, + "loss": 0.066, + "num_input_tokens_seen": 5418112, + "step": 11020 + }, + { + "epoch": 1.4550613699353305, + "grad_norm": 1.214483618736267, + "learning_rate": 1.785909818309435e-06, + "loss": 0.0883, + "num_input_tokens_seen": 5420352, + "step": 11025 + }, + { + "epoch": 1.455721261713079, + "grad_norm": 40.515533447265625, + "learning_rate": 1.7856248663168616e-06, + "loss": 0.1528, + "num_input_tokens_seen": 5422720, + "step": 11030 + }, + { + "epoch": 1.4563811534908275, + "grad_norm": 0.14102889597415924, + "learning_rate": 1.7853397475810995e-06, + "loss": 0.1301, + "num_input_tokens_seen": 5425024, + "step": 11035 + }, + { + "epoch": 1.457041045268576, + "grad_norm": 0.02465779520571232, + "learning_rate": 1.7850544621626626e-06, + "loss": 0.0646, + "num_input_tokens_seen": 5427584, + "step": 11040 + }, + { + "epoch": 1.4577009370463245, + "grad_norm": 67.14161682128906, + "learning_rate": 1.7847690101221011e-06, + "loss": 0.2122, + "num_input_tokens_seen": 5430400, + "step": 11045 + }, + { + "epoch": 1.4583608288240728, + "grad_norm": 0.14253701269626617, + "learning_rate": 1.7844833915200001e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5432960, + "step": 11050 + }, + { + "epoch": 1.4590207206018213, + "grad_norm": 14.538985252380371, + "learning_rate": 1.7841976064169803e-06, + "loss": 0.0658, + "num_input_tokens_seen": 5435520, + "step": 11055 + }, + { + "epoch": 1.4596806123795698, + "grad_norm": 21.02281951904297, + "learning_rate": 1.7839116548736972e-06, + "loss": 0.1915, + "num_input_tokens_seen": 5438016, + "step": 11060 + }, + { + "epoch": 1.460340504157318, + "grad_norm": 0.13544169068336487, + "learning_rate": 1.7836255369508418e-06, + "loss": 0.0019, + "num_input_tokens_seen": 5440384, + "step": 11065 + }, + { + "epoch": 1.4610003959350666, + "grad_norm": 0.4773879945278168, + "learning_rate": 1.7833392527091409e-06, + "loss": 0.0514, + "num_input_tokens_seen": 5443072, + "step": 11070 + }, + { + "epoch": 1.461660287712815, + "grad_norm": 0.03239217400550842, + "learning_rate": 1.7830528022093559e-06, + "loss": 0.1296, + "num_input_tokens_seen": 5445760, + "step": 11075 + }, + { + "epoch": 1.4623201794905636, + "grad_norm": 0.09148470312356949, + "learning_rate": 1.7827661855122842e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5448192, + "step": 11080 + }, + { + "epoch": 1.462980071268312, + "grad_norm": 19.332427978515625, + "learning_rate": 1.7824794026787577e-06, + "loss": 0.0479, + "num_input_tokens_seen": 5450752, + "step": 11085 + }, + { + "epoch": 1.4636399630460604, + "grad_norm": 31.496519088745117, + "learning_rate": 1.7821924537696447e-06, + "loss": 0.0038, + "num_input_tokens_seen": 5453056, + "step": 11090 + }, + { + "epoch": 1.4642998548238089, + "grad_norm": 24.9755859375, + "learning_rate": 1.7819053388458474e-06, + "loss": 0.0636, + "num_input_tokens_seen": 5455808, + "step": 11095 + }, + { + "epoch": 1.4649597466015574, + "grad_norm": 0.020973796024918556, + "learning_rate": 1.781618057968304e-06, + "loss": 0.1838, + "num_input_tokens_seen": 5458240, + "step": 11100 + }, + { + "epoch": 1.4656196383793059, + "grad_norm": 0.009609062224626541, + "learning_rate": 1.7813306111979878e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5460480, + "step": 11105 + }, + { + "epoch": 1.4662795301570544, + "grad_norm": 34.70246124267578, + "learning_rate": 1.7810429985959075e-06, + "loss": 0.1695, + "num_input_tokens_seen": 5462592, + "step": 11110 + }, + { + "epoch": 1.4669394219348026, + "grad_norm": 0.018167927861213684, + "learning_rate": 1.7807552202231065e-06, + "loss": 0.3442, + "num_input_tokens_seen": 5464896, + "step": 11115 + }, + { + "epoch": 1.4675993137125511, + "grad_norm": 0.22668059170246124, + "learning_rate": 1.7804672761406636e-06, + "loss": 0.1338, + "num_input_tokens_seen": 5467264, + "step": 11120 + }, + { + "epoch": 1.4682592054902996, + "grad_norm": 0.05633705109357834, + "learning_rate": 1.7801791664096933e-06, + "loss": 0.0805, + "num_input_tokens_seen": 5469696, + "step": 11125 + }, + { + "epoch": 1.468919097268048, + "grad_norm": 0.2396361529827118, + "learning_rate": 1.7798908910913444e-06, + "loss": 0.0636, + "num_input_tokens_seen": 5472512, + "step": 11130 + }, + { + "epoch": 1.4695789890457964, + "grad_norm": 14.083260536193848, + "learning_rate": 1.7796024502468015e-06, + "loss": 0.0954, + "num_input_tokens_seen": 5475200, + "step": 11135 + }, + { + "epoch": 1.470238880823545, + "grad_norm": 15.633596420288086, + "learning_rate": 1.7793138439372839e-06, + "loss": 0.1199, + "num_input_tokens_seen": 5477568, + "step": 11140 + }, + { + "epoch": 1.4708987726012934, + "grad_norm": 0.11257128417491913, + "learning_rate": 1.7790250722240463e-06, + "loss": 0.0413, + "num_input_tokens_seen": 5480000, + "step": 11145 + }, + { + "epoch": 1.471558664379042, + "grad_norm": 1.0883387327194214, + "learning_rate": 1.7787361351683784e-06, + "loss": 0.0579, + "num_input_tokens_seen": 5482496, + "step": 11150 + }, + { + "epoch": 1.4722185561567902, + "grad_norm": 14.952766418457031, + "learning_rate": 1.7784470328316048e-06, + "loss": 0.1602, + "num_input_tokens_seen": 5484928, + "step": 11155 + }, + { + "epoch": 1.4728784479345387, + "grad_norm": 0.10626795142889023, + "learning_rate": 1.7781577652750858e-06, + "loss": 0.0452, + "num_input_tokens_seen": 5487296, + "step": 11160 + }, + { + "epoch": 1.4735383397122872, + "grad_norm": 0.1313232183456421, + "learning_rate": 1.777868332560216e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5489856, + "step": 11165 + }, + { + "epoch": 1.4741982314900357, + "grad_norm": 15.769591331481934, + "learning_rate": 1.7775787347484255e-06, + "loss": 0.0447, + "num_input_tokens_seen": 5492352, + "step": 11170 + }, + { + "epoch": 1.4748581232677842, + "grad_norm": 0.09785334020853043, + "learning_rate": 1.7772889719011793e-06, + "loss": 0.0842, + "num_input_tokens_seen": 5494912, + "step": 11175 + }, + { + "epoch": 1.4755180150455325, + "grad_norm": 0.1397729367017746, + "learning_rate": 1.7769990440799775e-06, + "loss": 0.0015, + "num_input_tokens_seen": 5497600, + "step": 11180 + }, + { + "epoch": 1.476177906823281, + "grad_norm": 30.313491821289062, + "learning_rate": 1.7767089513463552e-06, + "loss": 0.0355, + "num_input_tokens_seen": 5500352, + "step": 11185 + }, + { + "epoch": 1.4768377986010295, + "grad_norm": 17.191814422607422, + "learning_rate": 1.7764186937618826e-06, + "loss": 0.1102, + "num_input_tokens_seen": 5502592, + "step": 11190 + }, + { + "epoch": 1.4774976903787778, + "grad_norm": 15.721531867980957, + "learning_rate": 1.7761282713881645e-06, + "loss": 0.2163, + "num_input_tokens_seen": 5505280, + "step": 11195 + }, + { + "epoch": 1.4781575821565263, + "grad_norm": 0.026539819315075874, + "learning_rate": 1.775837684286841e-06, + "loss": 0.0617, + "num_input_tokens_seen": 5507968, + "step": 11200 + }, + { + "epoch": 1.4788174739342748, + "grad_norm": 0.09990093111991882, + "learning_rate": 1.7755469325195871e-06, + "loss": 0.2514, + "num_input_tokens_seen": 5510592, + "step": 11205 + }, + { + "epoch": 1.4794773657120233, + "grad_norm": 0.5558841824531555, + "learning_rate": 1.7752560161481131e-06, + "loss": 0.054, + "num_input_tokens_seen": 5512896, + "step": 11210 + }, + { + "epoch": 1.4801372574897718, + "grad_norm": 2.259077787399292, + "learning_rate": 1.7749649352341636e-06, + "loss": 0.0291, + "num_input_tokens_seen": 5515584, + "step": 11215 + }, + { + "epoch": 1.48079714926752, + "grad_norm": 0.06075366958975792, + "learning_rate": 1.7746736898395182e-06, + "loss": 0.1157, + "num_input_tokens_seen": 5517888, + "step": 11220 + }, + { + "epoch": 1.4814570410452685, + "grad_norm": 1.9688565731048584, + "learning_rate": 1.7743822800259923e-06, + "loss": 0.1393, + "num_input_tokens_seen": 5520320, + "step": 11225 + }, + { + "epoch": 1.482116932823017, + "grad_norm": 10.639870643615723, + "learning_rate": 1.7740907058554348e-06, + "loss": 0.1536, + "num_input_tokens_seen": 5522624, + "step": 11230 + }, + { + "epoch": 1.4827768246007655, + "grad_norm": 12.147008895874023, + "learning_rate": 1.7737989673897307e-06, + "loss": 0.2235, + "num_input_tokens_seen": 5524864, + "step": 11235 + }, + { + "epoch": 1.483436716378514, + "grad_norm": 14.560885429382324, + "learning_rate": 1.7735070646907988e-06, + "loss": 0.1008, + "num_input_tokens_seen": 5527488, + "step": 11240 + }, + { + "epoch": 1.4840966081562623, + "grad_norm": 14.701653480529785, + "learning_rate": 1.773214997820594e-06, + "loss": 0.0975, + "num_input_tokens_seen": 5529856, + "step": 11245 + }, + { + "epoch": 1.4847564999340108, + "grad_norm": 3.8046138286590576, + "learning_rate": 1.772922766841105e-06, + "loss": 0.0849, + "num_input_tokens_seen": 5532352, + "step": 11250 + }, + { + "epoch": 1.4854163917117593, + "grad_norm": 68.81758117675781, + "learning_rate": 1.772630371814356e-06, + "loss": 0.0448, + "num_input_tokens_seen": 5534976, + "step": 11255 + }, + { + "epoch": 1.4860762834895076, + "grad_norm": 220.65342712402344, + "learning_rate": 1.7723378128024056e-06, + "loss": 0.0766, + "num_input_tokens_seen": 5537408, + "step": 11260 + }, + { + "epoch": 1.486736175267256, + "grad_norm": 0.06636308878660202, + "learning_rate": 1.7720450898673468e-06, + "loss": 0.0378, + "num_input_tokens_seen": 5540224, + "step": 11265 + }, + { + "epoch": 1.4873960670450046, + "grad_norm": 0.06799861788749695, + "learning_rate": 1.7717522030713088e-06, + "loss": 0.2048, + "num_input_tokens_seen": 5542784, + "step": 11270 + }, + { + "epoch": 1.488055958822753, + "grad_norm": 0.1660117208957672, + "learning_rate": 1.771459152476454e-06, + "loss": 0.0836, + "num_input_tokens_seen": 5544896, + "step": 11275 + }, + { + "epoch": 1.4887158506005016, + "grad_norm": 1.1547398567199707, + "learning_rate": 1.7711659381449807e-06, + "loss": 0.0574, + "num_input_tokens_seen": 5547520, + "step": 11280 + }, + { + "epoch": 1.4893757423782499, + "grad_norm": 12.23236083984375, + "learning_rate": 1.7708725601391214e-06, + "loss": 0.1081, + "num_input_tokens_seen": 5549952, + "step": 11285 + }, + { + "epoch": 1.4900356341559984, + "grad_norm": 1.404454231262207, + "learning_rate": 1.7705790185211433e-06, + "loss": 0.0337, + "num_input_tokens_seen": 5552768, + "step": 11290 + }, + { + "epoch": 1.4906955259337469, + "grad_norm": 0.15022261440753937, + "learning_rate": 1.770285313353349e-06, + "loss": 0.0916, + "num_input_tokens_seen": 5555392, + "step": 11295 + }, + { + "epoch": 1.4913554177114954, + "grad_norm": 17.854000091552734, + "learning_rate": 1.7699914446980745e-06, + "loss": 0.1468, + "num_input_tokens_seen": 5557760, + "step": 11300 + }, + { + "epoch": 1.4920153094892439, + "grad_norm": 21.836078643798828, + "learning_rate": 1.7696974126176917e-06, + "loss": 0.0169, + "num_input_tokens_seen": 5560192, + "step": 11305 + }, + { + "epoch": 1.4926752012669922, + "grad_norm": 0.2405860722064972, + "learning_rate": 1.769403217174607e-06, + "loss": 0.042, + "num_input_tokens_seen": 5562496, + "step": 11310 + }, + { + "epoch": 1.4933350930447407, + "grad_norm": 0.07342782616615295, + "learning_rate": 1.7691088584312608e-06, + "loss": 0.128, + "num_input_tokens_seen": 5564992, + "step": 11315 + }, + { + "epoch": 1.4939949848224892, + "grad_norm": 0.11119920760393143, + "learning_rate": 1.7688143364501292e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5567616, + "step": 11320 + }, + { + "epoch": 1.4946548766002374, + "grad_norm": 0.9778904318809509, + "learning_rate": 1.7685196512937217e-06, + "loss": 0.001, + "num_input_tokens_seen": 5569984, + "step": 11325 + }, + { + "epoch": 1.495314768377986, + "grad_norm": 0.11220138520002365, + "learning_rate": 1.7682248030245836e-06, + "loss": 0.1185, + "num_input_tokens_seen": 5572160, + "step": 11330 + }, + { + "epoch": 1.4959746601557344, + "grad_norm": 0.01077987626194954, + "learning_rate": 1.7679297917052939e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5574400, + "step": 11335 + }, + { + "epoch": 1.496634551933483, + "grad_norm": 0.051964689046144485, + "learning_rate": 1.7676346173984669e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5577088, + "step": 11340 + }, + { + "epoch": 1.4972944437112314, + "grad_norm": 0.009416039101779461, + "learning_rate": 1.7673392801667513e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5579584, + "step": 11345 + }, + { + "epoch": 1.4979543354889797, + "grad_norm": 19.469966888427734, + "learning_rate": 1.7670437800728298e-06, + "loss": 0.2305, + "num_input_tokens_seen": 5581952, + "step": 11350 + }, + { + "epoch": 1.4986142272667282, + "grad_norm": 0.007671706844121218, + "learning_rate": 1.7667481171794205e-06, + "loss": 0.0002, + "num_input_tokens_seen": 5584448, + "step": 11355 + }, + { + "epoch": 1.4992741190444767, + "grad_norm": 0.07305540889501572, + "learning_rate": 1.7664522915492759e-06, + "loss": 0.0414, + "num_input_tokens_seen": 5587008, + "step": 11360 + }, + { + "epoch": 1.4999340108222252, + "grad_norm": 0.051261018961668015, + "learning_rate": 1.7661563032451827e-06, + "loss": 0.0593, + "num_input_tokens_seen": 5589312, + "step": 11365 + }, + { + "epoch": 1.5005939025999737, + "grad_norm": 28.68985366821289, + "learning_rate": 1.7658601523299619e-06, + "loss": 0.1073, + "num_input_tokens_seen": 5591680, + "step": 11370 + }, + { + "epoch": 1.5005939025999737, + "eval_loss": 0.12565796077251434, + "eval_runtime": 7.9581, + "eval_samples_per_second": 846.31, + "eval_steps_per_second": 105.804, + "num_input_tokens_seen": 5591680, + "step": 11370 + }, + { + "epoch": 1.501253794377722, + "grad_norm": 1.1727688312530518, + "learning_rate": 1.7655638388664698e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5594176, + "step": 11375 + }, + { + "epoch": 1.5019136861554705, + "grad_norm": 0.05231983959674835, + "learning_rate": 1.765267362917597e-06, + "loss": 0.0044, + "num_input_tokens_seen": 5597056, + "step": 11380 + }, + { + "epoch": 1.502573577933219, + "grad_norm": 1.1232495307922363, + "learning_rate": 1.7649707245462678e-06, + "loss": 0.0555, + "num_input_tokens_seen": 5599488, + "step": 11385 + }, + { + "epoch": 1.5032334697109673, + "grad_norm": 0.18011930584907532, + "learning_rate": 1.7646739238154416e-06, + "loss": 0.0716, + "num_input_tokens_seen": 5601856, + "step": 11390 + }, + { + "epoch": 1.503893361488716, + "grad_norm": 0.16445372998714447, + "learning_rate": 1.7643769607881126e-06, + "loss": 0.1088, + "num_input_tokens_seen": 5604736, + "step": 11395 + }, + { + "epoch": 1.5045532532664643, + "grad_norm": 0.0443299375474453, + "learning_rate": 1.7640798355273087e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5607168, + "step": 11400 + }, + { + "epoch": 1.5052131450442128, + "grad_norm": 0.009100513532757759, + "learning_rate": 1.7637825480960929e-06, + "loss": 0.0018, + "num_input_tokens_seen": 5609600, + "step": 11405 + }, + { + "epoch": 1.5058730368219613, + "grad_norm": 0.5021543502807617, + "learning_rate": 1.7634850985575623e-06, + "loss": 0.2186, + "num_input_tokens_seen": 5612032, + "step": 11410 + }, + { + "epoch": 1.5065329285997096, + "grad_norm": 0.1777099221944809, + "learning_rate": 1.7631874869748477e-06, + "loss": 0.0788, + "num_input_tokens_seen": 5614656, + "step": 11415 + }, + { + "epoch": 1.507192820377458, + "grad_norm": 14.347442626953125, + "learning_rate": 1.7628897134111163e-06, + "loss": 0.128, + "num_input_tokens_seen": 5616768, + "step": 11420 + }, + { + "epoch": 1.5078527121552066, + "grad_norm": 13.774393081665039, + "learning_rate": 1.762591777929567e-06, + "loss": 0.1947, + "num_input_tokens_seen": 5619008, + "step": 11425 + }, + { + "epoch": 1.5085126039329548, + "grad_norm": 0.05142497643828392, + "learning_rate": 1.7622936805934355e-06, + "loss": 0.1306, + "num_input_tokens_seen": 5621440, + "step": 11430 + }, + { + "epoch": 1.5091724957107036, + "grad_norm": 0.14459626376628876, + "learning_rate": 1.7619954214659901e-06, + "loss": 0.0579, + "num_input_tokens_seen": 5623872, + "step": 11435 + }, + { + "epoch": 1.5098323874884518, + "grad_norm": 0.2629461884498596, + "learning_rate": 1.7616970006105347e-06, + "loss": 0.0584, + "num_input_tokens_seen": 5626240, + "step": 11440 + }, + { + "epoch": 1.5104922792662003, + "grad_norm": 0.13801227509975433, + "learning_rate": 1.7613984180904065e-06, + "loss": 0.1526, + "num_input_tokens_seen": 5628544, + "step": 11445 + }, + { + "epoch": 1.5111521710439488, + "grad_norm": 0.15773025155067444, + "learning_rate": 1.7610996739689779e-06, + "loss": 0.116, + "num_input_tokens_seen": 5630912, + "step": 11450 + }, + { + "epoch": 1.5118120628216971, + "grad_norm": 0.0603664331138134, + "learning_rate": 1.7608007683096547e-06, + "loss": 0.1417, + "num_input_tokens_seen": 5633472, + "step": 11455 + }, + { + "epoch": 1.5124719545994458, + "grad_norm": 0.05194404348731041, + "learning_rate": 1.7605017011758778e-06, + "loss": 0.1697, + "num_input_tokens_seen": 5635712, + "step": 11460 + }, + { + "epoch": 1.5131318463771941, + "grad_norm": 0.4702714681625366, + "learning_rate": 1.7602024726311219e-06, + "loss": 0.0017, + "num_input_tokens_seen": 5638208, + "step": 11465 + }, + { + "epoch": 1.5137917381549426, + "grad_norm": 0.3294268250465393, + "learning_rate": 1.7599030827388963e-06, + "loss": 0.0026, + "num_input_tokens_seen": 5640832, + "step": 11470 + }, + { + "epoch": 1.5144516299326911, + "grad_norm": 17.308738708496094, + "learning_rate": 1.7596035315627442e-06, + "loss": 0.0513, + "num_input_tokens_seen": 5643200, + "step": 11475 + }, + { + "epoch": 1.5151115217104394, + "grad_norm": 0.08752676844596863, + "learning_rate": 1.7593038191662427e-06, + "loss": 0.0494, + "num_input_tokens_seen": 5645888, + "step": 11480 + }, + { + "epoch": 1.515771413488188, + "grad_norm": 0.21542641520500183, + "learning_rate": 1.7590039456130046e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5648128, + "step": 11485 + }, + { + "epoch": 1.5164313052659364, + "grad_norm": 0.0905633419752121, + "learning_rate": 1.758703910966675e-06, + "loss": 0.0801, + "num_input_tokens_seen": 5650624, + "step": 11490 + }, + { + "epoch": 1.5170911970436847, + "grad_norm": 1.459328532218933, + "learning_rate": 1.7584037152909344e-06, + "loss": 0.1932, + "num_input_tokens_seen": 5653056, + "step": 11495 + }, + { + "epoch": 1.5177510888214334, + "grad_norm": 0.011277738027274609, + "learning_rate": 1.7581033586494973e-06, + "loss": 0.0716, + "num_input_tokens_seen": 5655552, + "step": 11500 + }, + { + "epoch": 1.5184109805991817, + "grad_norm": 141.13853454589844, + "learning_rate": 1.757802841106112e-06, + "loss": 0.1644, + "num_input_tokens_seen": 5658112, + "step": 11505 + }, + { + "epoch": 1.5190708723769302, + "grad_norm": 0.05243814364075661, + "learning_rate": 1.7575021627245612e-06, + "loss": 0.0823, + "num_input_tokens_seen": 5660480, + "step": 11510 + }, + { + "epoch": 1.5197307641546787, + "grad_norm": 0.09922140091657639, + "learning_rate": 1.7572013235686618e-06, + "loss": 0.1695, + "num_input_tokens_seen": 5662848, + "step": 11515 + }, + { + "epoch": 1.520390655932427, + "grad_norm": 0.6299600601196289, + "learning_rate": 1.7569003237022647e-06, + "loss": 0.0332, + "num_input_tokens_seen": 5665600, + "step": 11520 + }, + { + "epoch": 1.5210505477101757, + "grad_norm": 92.7926254272461, + "learning_rate": 1.756599163189255e-06, + "loss": 0.0245, + "num_input_tokens_seen": 5667776, + "step": 11525 + }, + { + "epoch": 1.521710439487924, + "grad_norm": 11.94028377532959, + "learning_rate": 1.7562978420935516e-06, + "loss": 0.0693, + "num_input_tokens_seen": 5670400, + "step": 11530 + }, + { + "epoch": 1.5223703312656724, + "grad_norm": 0.1259056031703949, + "learning_rate": 1.755996360479108e-06, + "loss": 0.0025, + "num_input_tokens_seen": 5673152, + "step": 11535 + }, + { + "epoch": 1.523030223043421, + "grad_norm": 0.020230580121278763, + "learning_rate": 1.7556947184099115e-06, + "loss": 0.0894, + "num_input_tokens_seen": 5675648, + "step": 11540 + }, + { + "epoch": 1.5236901148211692, + "grad_norm": 0.01828363724052906, + "learning_rate": 1.7553929159499832e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5678144, + "step": 11545 + }, + { + "epoch": 1.5243500065989177, + "grad_norm": 0.06999734044075012, + "learning_rate": 1.755090953163379e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5680448, + "step": 11550 + }, + { + "epoch": 1.5250098983766662, + "grad_norm": 1.2670248746871948, + "learning_rate": 1.754788830114187e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5683008, + "step": 11555 + }, + { + "epoch": 1.5256697901544147, + "grad_norm": 0.021599799394607544, + "learning_rate": 1.7544865468665325e-06, + "loss": 0.1216, + "num_input_tokens_seen": 5685632, + "step": 11560 + }, + { + "epoch": 1.5263296819321632, + "grad_norm": 27.43296241760254, + "learning_rate": 1.7541841034845714e-06, + "loss": 0.0914, + "num_input_tokens_seen": 5687936, + "step": 11565 + }, + { + "epoch": 1.5269895737099115, + "grad_norm": 0.059593282639980316, + "learning_rate": 1.753881500032496e-06, + "loss": 0.0879, + "num_input_tokens_seen": 5690560, + "step": 11570 + }, + { + "epoch": 1.52764946548766, + "grad_norm": 0.017369644716382027, + "learning_rate": 1.7535787365745314e-06, + "loss": 0.0659, + "num_input_tokens_seen": 5692928, + "step": 11575 + }, + { + "epoch": 1.5283093572654085, + "grad_norm": 137.04595947265625, + "learning_rate": 1.7532758131749367e-06, + "loss": 0.1278, + "num_input_tokens_seen": 5695232, + "step": 11580 + }, + { + "epoch": 1.5289692490431568, + "grad_norm": 0.39762821793556213, + "learning_rate": 1.7529727298980058e-06, + "loss": 0.0222, + "num_input_tokens_seen": 5697856, + "step": 11585 + }, + { + "epoch": 1.5296291408209055, + "grad_norm": 0.1500694751739502, + "learning_rate": 1.7526694868080654e-06, + "loss": 0.1518, + "num_input_tokens_seen": 5700544, + "step": 11590 + }, + { + "epoch": 1.5302890325986538, + "grad_norm": 0.22773054242134094, + "learning_rate": 1.752366083969477e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5702976, + "step": 11595 + }, + { + "epoch": 1.5309489243764023, + "grad_norm": 0.05045131593942642, + "learning_rate": 1.7520625214466352e-06, + "loss": 0.0432, + "num_input_tokens_seen": 5705600, + "step": 11600 + }, + { + "epoch": 1.5316088161541508, + "grad_norm": 0.05065304785966873, + "learning_rate": 1.7517587993039693e-06, + "loss": 0.157, + "num_input_tokens_seen": 5707968, + "step": 11605 + }, + { + "epoch": 1.532268707931899, + "grad_norm": 0.052952587604522705, + "learning_rate": 1.751454917605942e-06, + "loss": 0.1714, + "num_input_tokens_seen": 5710656, + "step": 11610 + }, + { + "epoch": 1.5329285997096476, + "grad_norm": 15.411102294921875, + "learning_rate": 1.7511508764170502e-06, + "loss": 0.1965, + "num_input_tokens_seen": 5712960, + "step": 11615 + }, + { + "epoch": 1.533588491487396, + "grad_norm": 0.49912622570991516, + "learning_rate": 1.7508466758018243e-06, + "loss": 0.1463, + "num_input_tokens_seen": 5715456, + "step": 11620 + }, + { + "epoch": 1.5342483832651446, + "grad_norm": 0.6242772936820984, + "learning_rate": 1.7505423158248285e-06, + "loss": 0.1403, + "num_input_tokens_seen": 5718080, + "step": 11625 + }, + { + "epoch": 1.534908275042893, + "grad_norm": 11.48921012878418, + "learning_rate": 1.750237796550661e-06, + "loss": 0.1222, + "num_input_tokens_seen": 5720448, + "step": 11630 + }, + { + "epoch": 1.5355681668206413, + "grad_norm": 0.16093257069587708, + "learning_rate": 1.7499331180439545e-06, + "loss": 0.004, + "num_input_tokens_seen": 5722816, + "step": 11635 + }, + { + "epoch": 1.5362280585983898, + "grad_norm": 0.8627403974533081, + "learning_rate": 1.749628280369374e-06, + "loss": 0.0761, + "num_input_tokens_seen": 5725184, + "step": 11640 + }, + { + "epoch": 1.5368879503761383, + "grad_norm": 2.9328296184539795, + "learning_rate": 1.7493232835916195e-06, + "loss": 0.0645, + "num_input_tokens_seen": 5727872, + "step": 11645 + }, + { + "epoch": 1.5375478421538866, + "grad_norm": 0.4395294785499573, + "learning_rate": 1.7490181277754238e-06, + "loss": 0.0513, + "num_input_tokens_seen": 5730560, + "step": 11650 + }, + { + "epoch": 1.5382077339316353, + "grad_norm": 0.03519681468605995, + "learning_rate": 1.748712812985555e-06, + "loss": 0.0013, + "num_input_tokens_seen": 5733056, + "step": 11655 + }, + { + "epoch": 1.5388676257093836, + "grad_norm": 0.04068261757493019, + "learning_rate": 1.7484073392868133e-06, + "loss": 0.0553, + "num_input_tokens_seen": 5735744, + "step": 11660 + }, + { + "epoch": 1.5395275174871321, + "grad_norm": 117.09519958496094, + "learning_rate": 1.7481017067440332e-06, + "loss": 0.2724, + "num_input_tokens_seen": 5738112, + "step": 11665 + }, + { + "epoch": 1.5401874092648806, + "grad_norm": 16.728363037109375, + "learning_rate": 1.7477959154220834e-06, + "loss": 0.0778, + "num_input_tokens_seen": 5740480, + "step": 11670 + }, + { + "epoch": 1.540847301042629, + "grad_norm": 0.05176861584186554, + "learning_rate": 1.7474899653858651e-06, + "loss": 0.1881, + "num_input_tokens_seen": 5742720, + "step": 11675 + }, + { + "epoch": 1.5415071928203774, + "grad_norm": 0.1164284497499466, + "learning_rate": 1.7471838567003153e-06, + "loss": 0.0014, + "num_input_tokens_seen": 5745088, + "step": 11680 + }, + { + "epoch": 1.542167084598126, + "grad_norm": 4.228572368621826, + "learning_rate": 1.746877589430402e-06, + "loss": 0.0763, + "num_input_tokens_seen": 5747328, + "step": 11685 + }, + { + "epoch": 1.5428269763758744, + "grad_norm": 23.04313850402832, + "learning_rate": 1.7465711636411288e-06, + "loss": 0.1275, + "num_input_tokens_seen": 5749952, + "step": 11690 + }, + { + "epoch": 1.543486868153623, + "grad_norm": 12.308265686035156, + "learning_rate": 1.746264579397533e-06, + "loss": 0.2444, + "num_input_tokens_seen": 5752512, + "step": 11695 + }, + { + "epoch": 1.5441467599313712, + "grad_norm": 0.12739183008670807, + "learning_rate": 1.7459578367646836e-06, + "loss": 0.0617, + "num_input_tokens_seen": 5755136, + "step": 11700 + }, + { + "epoch": 1.5448066517091197, + "grad_norm": 26.933069229125977, + "learning_rate": 1.7456509358076854e-06, + "loss": 0.0409, + "num_input_tokens_seen": 5757568, + "step": 11705 + }, + { + "epoch": 1.5454665434868682, + "grad_norm": 0.07969870418310165, + "learning_rate": 1.7453438765916758e-06, + "loss": 0.0776, + "num_input_tokens_seen": 5760000, + "step": 11710 + }, + { + "epoch": 1.5461264352646165, + "grad_norm": 0.0692417323589325, + "learning_rate": 1.7450366591818255e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5762496, + "step": 11715 + }, + { + "epoch": 1.5467863270423652, + "grad_norm": 13.611120223999023, + "learning_rate": 1.7447292836433393e-06, + "loss": 0.0578, + "num_input_tokens_seen": 5764992, + "step": 11720 + }, + { + "epoch": 1.5474462188201135, + "grad_norm": 0.0573265440762043, + "learning_rate": 1.744421750041456e-06, + "loss": 0.2353, + "num_input_tokens_seen": 5767552, + "step": 11725 + }, + { + "epoch": 1.548106110597862, + "grad_norm": 0.03308899700641632, + "learning_rate": 1.7441140584414466e-06, + "loss": 0.0704, + "num_input_tokens_seen": 5770496, + "step": 11730 + }, + { + "epoch": 1.5487660023756105, + "grad_norm": 0.15724174678325653, + "learning_rate": 1.7438062089086167e-06, + "loss": 0.0033, + "num_input_tokens_seen": 5772864, + "step": 11735 + }, + { + "epoch": 1.5494258941533587, + "grad_norm": 167.92340087890625, + "learning_rate": 1.7434982015083056e-06, + "loss": 0.13, + "num_input_tokens_seen": 5775360, + "step": 11740 + }, + { + "epoch": 1.5500857859311075, + "grad_norm": 0.12244913727045059, + "learning_rate": 1.743190036305885e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5777728, + "step": 11745 + }, + { + "epoch": 1.5507456777088557, + "grad_norm": 10.200172424316406, + "learning_rate": 1.7428817133667607e-06, + "loss": 0.138, + "num_input_tokens_seen": 5780160, + "step": 11750 + }, + { + "epoch": 1.5514055694866042, + "grad_norm": 0.5580622553825378, + "learning_rate": 1.7425732327563724e-06, + "loss": 0.0649, + "num_input_tokens_seen": 5782656, + "step": 11755 + }, + { + "epoch": 1.5520654612643527, + "grad_norm": 0.1402365267276764, + "learning_rate": 1.742264594540193e-06, + "loss": 0.0009, + "num_input_tokens_seen": 5784832, + "step": 11760 + }, + { + "epoch": 1.552725353042101, + "grad_norm": 0.8441329002380371, + "learning_rate": 1.7419557987837282e-06, + "loss": 0.0474, + "num_input_tokens_seen": 5787392, + "step": 11765 + }, + { + "epoch": 1.5533852448198495, + "grad_norm": 0.062263913452625275, + "learning_rate": 1.7416468455525179e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5790080, + "step": 11770 + }, + { + "epoch": 1.554045136597598, + "grad_norm": 0.06264446675777435, + "learning_rate": 1.7413377349121353e-06, + "loss": 0.1212, + "num_input_tokens_seen": 5792576, + "step": 11775 + }, + { + "epoch": 1.5547050283753463, + "grad_norm": 37.131507873535156, + "learning_rate": 1.7410284669281868e-06, + "loss": 0.0603, + "num_input_tokens_seen": 5795136, + "step": 11780 + }, + { + "epoch": 1.555364920153095, + "grad_norm": 23.096406936645508, + "learning_rate": 1.7407190416663124e-06, + "loss": 0.1844, + "num_input_tokens_seen": 5797568, + "step": 11785 + }, + { + "epoch": 1.5560248119308433, + "grad_norm": 0.10751156508922577, + "learning_rate": 1.7404094591921852e-06, + "loss": 0.068, + "num_input_tokens_seen": 5799808, + "step": 11790 + }, + { + "epoch": 1.5566847037085918, + "grad_norm": 20.53606605529785, + "learning_rate": 1.740099719571512e-06, + "loss": 0.0521, + "num_input_tokens_seen": 5802240, + "step": 11795 + }, + { + "epoch": 1.5573445954863403, + "grad_norm": 0.07454346865415573, + "learning_rate": 1.7397898228700324e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5804480, + "step": 11800 + }, + { + "epoch": 1.5580044872640886, + "grad_norm": 0.377360075712204, + "learning_rate": 1.7394797691535203e-06, + "loss": 0.1066, + "num_input_tokens_seen": 5806912, + "step": 11805 + }, + { + "epoch": 1.5586643790418373, + "grad_norm": 18.144367218017578, + "learning_rate": 1.739169558487782e-06, + "loss": 0.0599, + "num_input_tokens_seen": 5809152, + "step": 11810 + }, + { + "epoch": 1.5593242708195856, + "grad_norm": 229.2332000732422, + "learning_rate": 1.7388591909386575e-06, + "loss": 0.033, + "num_input_tokens_seen": 5811712, + "step": 11815 + }, + { + "epoch": 1.559984162597334, + "grad_norm": 0.1694110631942749, + "learning_rate": 1.7385486665720203e-06, + "loss": 0.1292, + "num_input_tokens_seen": 5814144, + "step": 11820 + }, + { + "epoch": 1.5606440543750826, + "grad_norm": 0.32974788546562195, + "learning_rate": 1.7382379854537767e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5816512, + "step": 11825 + }, + { + "epoch": 1.5613039461528309, + "grad_norm": 0.3102016746997833, + "learning_rate": 1.7379271476498666e-06, + "loss": 0.17, + "num_input_tokens_seen": 5819072, + "step": 11830 + }, + { + "epoch": 1.5619638379305794, + "grad_norm": 16.128957748413086, + "learning_rate": 1.737616153226263e-06, + "loss": 0.3636, + "num_input_tokens_seen": 5821632, + "step": 11835 + }, + { + "epoch": 1.5626237297083279, + "grad_norm": 0.35809439420700073, + "learning_rate": 1.7373050022489722e-06, + "loss": 0.0012, + "num_input_tokens_seen": 5824000, + "step": 11840 + }, + { + "epoch": 1.5632836214860761, + "grad_norm": 305.5896301269531, + "learning_rate": 1.736993694784034e-06, + "loss": 0.1184, + "num_input_tokens_seen": 5826304, + "step": 11845 + }, + { + "epoch": 1.5639435132638249, + "grad_norm": 55.598350524902344, + "learning_rate": 1.736682230897521e-06, + "loss": 0.0028, + "num_input_tokens_seen": 5828608, + "step": 11850 + }, + { + "epoch": 1.5646034050415731, + "grad_norm": 0.09117922931909561, + "learning_rate": 1.7363706106555388e-06, + "loss": 0.0607, + "num_input_tokens_seen": 5831232, + "step": 11855 + }, + { + "epoch": 1.5652632968193216, + "grad_norm": 0.2058306783437729, + "learning_rate": 1.7360588341242273e-06, + "loss": 0.1293, + "num_input_tokens_seen": 5833664, + "step": 11860 + }, + { + "epoch": 1.5659231885970701, + "grad_norm": 20.225988388061523, + "learning_rate": 1.7357469013697582e-06, + "loss": 0.047, + "num_input_tokens_seen": 5835968, + "step": 11865 + }, + { + "epoch": 1.5665830803748184, + "grad_norm": 0.11420862376689911, + "learning_rate": 1.735434812458337e-06, + "loss": 0.05, + "num_input_tokens_seen": 5838464, + "step": 11870 + }, + { + "epoch": 1.5672429721525671, + "grad_norm": 0.06262335926294327, + "learning_rate": 1.7351225674562023e-06, + "loss": 0.0951, + "num_input_tokens_seen": 5840768, + "step": 11875 + }, + { + "epoch": 1.5679028639303154, + "grad_norm": 0.4882711172103882, + "learning_rate": 1.7348101664296265e-06, + "loss": 0.0417, + "num_input_tokens_seen": 5843328, + "step": 11880 + }, + { + "epoch": 1.568562755708064, + "grad_norm": 25.356002807617188, + "learning_rate": 1.7344976094449138e-06, + "loss": 0.0526, + "num_input_tokens_seen": 5845824, + "step": 11885 + }, + { + "epoch": 1.5692226474858124, + "grad_norm": 0.4536149799823761, + "learning_rate": 1.734184896568402e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5848000, + "step": 11890 + }, + { + "epoch": 1.5698825392635607, + "grad_norm": 16.68556785583496, + "learning_rate": 1.7338720278664627e-06, + "loss": 0.1243, + "num_input_tokens_seen": 5850432, + "step": 11895 + }, + { + "epoch": 1.5705424310413092, + "grad_norm": 0.03905687853693962, + "learning_rate": 1.7335590034054997e-06, + "loss": 0.0003, + "num_input_tokens_seen": 5852800, + "step": 11900 + }, + { + "epoch": 1.5712023228190577, + "grad_norm": 32.980892181396484, + "learning_rate": 1.7332458232519502e-06, + "loss": 0.0568, + "num_input_tokens_seen": 5855104, + "step": 11905 + }, + { + "epoch": 1.571862214596806, + "grad_norm": 0.020980341359972954, + "learning_rate": 1.7329324874722846e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5857664, + "step": 11910 + }, + { + "epoch": 1.5725221063745547, + "grad_norm": 0.1890995353460312, + "learning_rate": 1.7326189961330058e-06, + "loss": 0.0007, + "num_input_tokens_seen": 5859904, + "step": 11915 + }, + { + "epoch": 1.573181998152303, + "grad_norm": 0.3043166995048523, + "learning_rate": 1.7323053493006505e-06, + "loss": 0.0809, + "num_input_tokens_seen": 5862080, + "step": 11920 + }, + { + "epoch": 1.5738418899300515, + "grad_norm": 0.31387296319007874, + "learning_rate": 1.7319915470417876e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5864384, + "step": 11925 + }, + { + "epoch": 1.5745017817078, + "grad_norm": 0.07397419214248657, + "learning_rate": 1.7316775894230197e-06, + "loss": 0.191, + "num_input_tokens_seen": 5866752, + "step": 11930 + }, + { + "epoch": 1.5751616734855483, + "grad_norm": 0.2072152942419052, + "learning_rate": 1.7313634765109816e-06, + "loss": 0.0737, + "num_input_tokens_seen": 5869248, + "step": 11935 + }, + { + "epoch": 1.575821565263297, + "grad_norm": 0.016625676304101944, + "learning_rate": 1.731049208372342e-06, + "loss": 0.0554, + "num_input_tokens_seen": 5871872, + "step": 11940 + }, + { + "epoch": 1.5764814570410453, + "grad_norm": 0.14236846566200256, + "learning_rate": 1.7307347850738014e-06, + "loss": 0.1984, + "num_input_tokens_seen": 5874176, + "step": 11945 + }, + { + "epoch": 1.5771413488187938, + "grad_norm": 0.07960677891969681, + "learning_rate": 1.7304202066820945e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5876480, + "step": 11950 + }, + { + "epoch": 1.5778012405965423, + "grad_norm": 0.4623545706272125, + "learning_rate": 1.7301054732639882e-06, + "loss": 0.0588, + "num_input_tokens_seen": 5879104, + "step": 11955 + }, + { + "epoch": 1.5784611323742905, + "grad_norm": 182.28273010253906, + "learning_rate": 1.729790584886282e-06, + "loss": 0.1775, + "num_input_tokens_seen": 5881856, + "step": 11960 + }, + { + "epoch": 1.579121024152039, + "grad_norm": 0.05107644945383072, + "learning_rate": 1.7294755416158089e-06, + "loss": 0.0426, + "num_input_tokens_seen": 5884416, + "step": 11965 + }, + { + "epoch": 1.5797809159297875, + "grad_norm": 0.07986666262149811, + "learning_rate": 1.7291603435194344e-06, + "loss": 0.1012, + "num_input_tokens_seen": 5887040, + "step": 11970 + }, + { + "epoch": 1.5804408077075358, + "grad_norm": 1.2375479936599731, + "learning_rate": 1.7288449906640571e-06, + "loss": 0.001, + "num_input_tokens_seen": 5889536, + "step": 11975 + }, + { + "epoch": 1.5811006994852845, + "grad_norm": 0.03987383469939232, + "learning_rate": 1.7285294831166087e-06, + "loss": 0.1094, + "num_input_tokens_seen": 5891712, + "step": 11980 + }, + { + "epoch": 1.5817605912630328, + "grad_norm": 0.13992907106876373, + "learning_rate": 1.728213820944053e-06, + "loss": 0.0588, + "num_input_tokens_seen": 5894016, + "step": 11985 + }, + { + "epoch": 1.5824204830407813, + "grad_norm": 0.28587380051612854, + "learning_rate": 1.727898004213387e-06, + "loss": 0.0014, + "num_input_tokens_seen": 5896320, + "step": 11990 + }, + { + "epoch": 1.5830803748185298, + "grad_norm": 40.24662399291992, + "learning_rate": 1.7275820329916408e-06, + "loss": 0.1045, + "num_input_tokens_seen": 5898880, + "step": 11995 + }, + { + "epoch": 1.583740266596278, + "grad_norm": 11.798562049865723, + "learning_rate": 1.7272659073458766e-06, + "loss": 0.1356, + "num_input_tokens_seen": 5901632, + "step": 12000 + }, + { + "epoch": 1.5844001583740268, + "grad_norm": 0.6922625303268433, + "learning_rate": 1.7269496273431903e-06, + "loss": 0.0449, + "num_input_tokens_seen": 5903936, + "step": 12005 + }, + { + "epoch": 1.585060050151775, + "grad_norm": 0.17535802721977234, + "learning_rate": 1.7266331930507097e-06, + "loss": 0.0552, + "num_input_tokens_seen": 5906176, + "step": 12010 + }, + { + "epoch": 1.5857199419295236, + "grad_norm": 0.09550785273313522, + "learning_rate": 1.7263166045355954e-06, + "loss": 0.0456, + "num_input_tokens_seen": 5908864, + "step": 12015 + }, + { + "epoch": 1.586379833707272, + "grad_norm": 0.042160920798778534, + "learning_rate": 1.7259998618650418e-06, + "loss": 0.0356, + "num_input_tokens_seen": 5911424, + "step": 12020 + }, + { + "epoch": 1.5870397254850204, + "grad_norm": 0.1057129055261612, + "learning_rate": 1.7256829651062745e-06, + "loss": 0.0003, + "num_input_tokens_seen": 5913920, + "step": 12025 + }, + { + "epoch": 1.5876996172627689, + "grad_norm": 15.842555046081543, + "learning_rate": 1.725365914326553e-06, + "loss": 0.2519, + "num_input_tokens_seen": 5916160, + "step": 12030 + }, + { + "epoch": 1.5883595090405174, + "grad_norm": 0.04782838001847267, + "learning_rate": 1.7250487095931687e-06, + "loss": 0.0035, + "num_input_tokens_seen": 5918656, + "step": 12035 + }, + { + "epoch": 1.5890194008182656, + "grad_norm": 0.644459068775177, + "learning_rate": 1.7247313509734465e-06, + "loss": 0.0008, + "num_input_tokens_seen": 5921088, + "step": 12040 + }, + { + "epoch": 1.5896792925960144, + "grad_norm": 0.4349988102912903, + "learning_rate": 1.7244138385347429e-06, + "loss": 0.0678, + "num_input_tokens_seen": 5923456, + "step": 12045 + }, + { + "epoch": 1.5903391843737626, + "grad_norm": 0.07780405879020691, + "learning_rate": 1.7240961723444479e-06, + "loss": 0.0005, + "num_input_tokens_seen": 5925888, + "step": 12050 + }, + { + "epoch": 1.5909990761515111, + "grad_norm": 0.47382500767707825, + "learning_rate": 1.7237783524699836e-06, + "loss": 0.0975, + "num_input_tokens_seen": 5928384, + "step": 12055 + }, + { + "epoch": 1.5916589679292596, + "grad_norm": 0.043678492307662964, + "learning_rate": 1.7234603789788054e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5930880, + "step": 12060 + }, + { + "epoch": 1.592318859707008, + "grad_norm": 0.39742064476013184, + "learning_rate": 1.7231422519384008e-06, + "loss": 0.0004, + "num_input_tokens_seen": 5933184, + "step": 12065 + }, + { + "epoch": 1.5929787514847566, + "grad_norm": 0.02984798699617386, + "learning_rate": 1.7228239714162895e-06, + "loss": 0.2659, + "num_input_tokens_seen": 5935744, + "step": 12070 + }, + { + "epoch": 1.593638643262505, + "grad_norm": 0.26426124572753906, + "learning_rate": 1.7225055374800249e-06, + "loss": 0.0666, + "num_input_tokens_seen": 5938304, + "step": 12075 + }, + { + "epoch": 1.5942985350402534, + "grad_norm": 0.2540627717971802, + "learning_rate": 1.7221869501971917e-06, + "loss": 0.0895, + "num_input_tokens_seen": 5940992, + "step": 12080 + }, + { + "epoch": 1.594958426818002, + "grad_norm": 0.1360429972410202, + "learning_rate": 1.721868209635408e-06, + "loss": 0.0722, + "num_input_tokens_seen": 5943552, + "step": 12085 + }, + { + "epoch": 1.5956183185957502, + "grad_norm": 0.08838418126106262, + "learning_rate": 1.7215493158623242e-06, + "loss": 0.0022, + "num_input_tokens_seen": 5946176, + "step": 12090 + }, + { + "epoch": 1.5962782103734987, + "grad_norm": 0.19462290406227112, + "learning_rate": 1.7212302689456234e-06, + "loss": 0.0013, + "num_input_tokens_seen": 5948800, + "step": 12095 + }, + { + "epoch": 1.5969381021512472, + "grad_norm": 51.29019546508789, + "learning_rate": 1.72091106895302e-06, + "loss": 0.0489, + "num_input_tokens_seen": 5951296, + "step": 12100 + }, + { + "epoch": 1.5975979939289955, + "grad_norm": 14.03069019317627, + "learning_rate": 1.7205917159522635e-06, + "loss": 0.0734, + "num_input_tokens_seen": 5953600, + "step": 12105 + }, + { + "epoch": 1.5982578857067442, + "grad_norm": 0.015758154913783073, + "learning_rate": 1.7202722100111328e-06, + "loss": 0.0671, + "num_input_tokens_seen": 5956224, + "step": 12110 + }, + { + "epoch": 1.5989177774844925, + "grad_norm": 14.346113204956055, + "learning_rate": 1.7199525511974417e-06, + "loss": 0.1586, + "num_input_tokens_seen": 5958656, + "step": 12115 + }, + { + "epoch": 1.599577669262241, + "grad_norm": 0.01834205538034439, + "learning_rate": 1.7196327395790352e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5961088, + "step": 12120 + }, + { + "epoch": 1.6002375610399895, + "grad_norm": 13.146003723144531, + "learning_rate": 1.7193127752237906e-06, + "loss": 0.1002, + "num_input_tokens_seen": 5963520, + "step": 12125 + }, + { + "epoch": 1.6008974528177378, + "grad_norm": 0.1535753607749939, + "learning_rate": 1.7189926581996184e-06, + "loss": 0.0006, + "num_input_tokens_seen": 5965888, + "step": 12130 + }, + { + "epoch": 1.6015573445954865, + "grad_norm": 2.0366768836975098, + "learning_rate": 1.7186723885744609e-06, + "loss": 0.0669, + "num_input_tokens_seen": 5968064, + "step": 12135 + }, + { + "epoch": 1.6022172363732348, + "grad_norm": 1.0037025213241577, + "learning_rate": 1.7183519664162934e-06, + "loss": 0.138, + "num_input_tokens_seen": 5970560, + "step": 12140 + }, + { + "epoch": 1.6028771281509833, + "grad_norm": 0.06006058678030968, + "learning_rate": 1.7180313917931226e-06, + "loss": 0.1324, + "num_input_tokens_seen": 5973248, + "step": 12145 + }, + { + "epoch": 1.6035370199287318, + "grad_norm": 0.02915399707853794, + "learning_rate": 1.717710664772989e-06, + "loss": 0.0511, + "num_input_tokens_seen": 5975808, + "step": 12150 + }, + { + "epoch": 1.60419691170648, + "grad_norm": 0.09983984380960464, + "learning_rate": 1.7173897854239635e-06, + "loss": 0.0011, + "num_input_tokens_seen": 5978176, + "step": 12155 + }, + { + "epoch": 1.6048568034842285, + "grad_norm": 0.39569634199142456, + "learning_rate": 1.7170687538141512e-06, + "loss": 0.0115, + "num_input_tokens_seen": 5980608, + "step": 12160 + }, + { + "epoch": 1.605516695261977, + "grad_norm": 0.05424243211746216, + "learning_rate": 1.7167475700116882e-06, + "loss": 0.0712, + "num_input_tokens_seen": 5983424, + "step": 12165 + }, + { + "epoch": 1.6061765870397253, + "grad_norm": 0.008268141187727451, + "learning_rate": 1.7164262340847442e-06, + "loss": 0.1335, + "num_input_tokens_seen": 5985728, + "step": 12170 + }, + { + "epoch": 1.606836478817474, + "grad_norm": 0.09651245921850204, + "learning_rate": 1.71610474610152e-06, + "loss": 0.0008, + "num_input_tokens_seen": 5988288, + "step": 12175 + }, + { + "epoch": 1.6074963705952223, + "grad_norm": 0.19663313031196594, + "learning_rate": 1.7157831061302485e-06, + "loss": 0.0775, + "num_input_tokens_seen": 5991232, + "step": 12180 + }, + { + "epoch": 1.6081562623729708, + "grad_norm": 0.14674024283885956, + "learning_rate": 1.7154613142391968e-06, + "loss": 0.0946, + "num_input_tokens_seen": 5993728, + "step": 12185 + }, + { + "epoch": 1.6088161541507193, + "grad_norm": 0.055165428668260574, + "learning_rate": 1.7151393704966616e-06, + "loss": 0.0603, + "num_input_tokens_seen": 5996288, + "step": 12190 + }, + { + "epoch": 1.6094760459284676, + "grad_norm": 0.08553388714790344, + "learning_rate": 1.7148172749709736e-06, + "loss": 0.1577, + "num_input_tokens_seen": 5998848, + "step": 12195 + }, + { + "epoch": 1.6101359377062163, + "grad_norm": 0.19394369423389435, + "learning_rate": 1.7144950277304955e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6001728, + "step": 12200 + }, + { + "epoch": 1.6107958294839646, + "grad_norm": 0.3518393337726593, + "learning_rate": 1.7141726288436216e-06, + "loss": 0.0347, + "num_input_tokens_seen": 6004480, + "step": 12205 + }, + { + "epoch": 1.611455721261713, + "grad_norm": 0.23870500922203064, + "learning_rate": 1.713850078378779e-06, + "loss": 0.0302, + "num_input_tokens_seen": 6006912, + "step": 12210 + }, + { + "epoch": 1.6121156130394616, + "grad_norm": 0.05858458951115608, + "learning_rate": 1.7135273764044262e-06, + "loss": 0.1457, + "num_input_tokens_seen": 6009344, + "step": 12215 + }, + { + "epoch": 1.6127755048172099, + "grad_norm": 0.08137746900320053, + "learning_rate": 1.7132045229890552e-06, + "loss": 0.0659, + "num_input_tokens_seen": 6011776, + "step": 12220 + }, + { + "epoch": 1.6134353965949584, + "grad_norm": 0.02841508761048317, + "learning_rate": 1.7128815182011886e-06, + "loss": 0.1002, + "num_input_tokens_seen": 6014080, + "step": 12225 + }, + { + "epoch": 1.6140952883727069, + "grad_norm": 20.118602752685547, + "learning_rate": 1.7125583621093819e-06, + "loss": 0.1825, + "num_input_tokens_seen": 6016640, + "step": 12230 + }, + { + "epoch": 1.6147551801504552, + "grad_norm": 0.10016603022813797, + "learning_rate": 1.712235054782223e-06, + "loss": 0.0623, + "num_input_tokens_seen": 6018816, + "step": 12235 + }, + { + "epoch": 1.6154150719282039, + "grad_norm": 6.060418128967285, + "learning_rate": 1.7119115962883313e-06, + "loss": 0.001, + "num_input_tokens_seen": 6021312, + "step": 12240 + }, + { + "epoch": 1.6160749637059522, + "grad_norm": 0.0981973260641098, + "learning_rate": 1.7115879866963586e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6023616, + "step": 12245 + }, + { + "epoch": 1.6167348554837007, + "grad_norm": 0.24712832272052765, + "learning_rate": 1.7112642260749885e-06, + "loss": 0.1983, + "num_input_tokens_seen": 6026112, + "step": 12250 + }, + { + "epoch": 1.6173947472614492, + "grad_norm": 0.10543973743915558, + "learning_rate": 1.7109403144929369e-06, + "loss": 0.0004, + "num_input_tokens_seen": 6028544, + "step": 12255 + }, + { + "epoch": 1.6180546390391974, + "grad_norm": 0.06185751035809517, + "learning_rate": 1.7106162520189522e-06, + "loss": 0.0435, + "num_input_tokens_seen": 6031104, + "step": 12260 + }, + { + "epoch": 1.6187145308169462, + "grad_norm": 0.056501973420381546, + "learning_rate": 1.7102920387218136e-06, + "loss": 0.1353, + "num_input_tokens_seen": 6033728, + "step": 12265 + }, + { + "epoch": 1.6193744225946944, + "grad_norm": 11.367016792297363, + "learning_rate": 1.7099676746703332e-06, + "loss": 0.2136, + "num_input_tokens_seen": 6036352, + "step": 12270 + }, + { + "epoch": 1.620034314372443, + "grad_norm": 12.27001667022705, + "learning_rate": 1.7096431599333552e-06, + "loss": 0.1958, + "num_input_tokens_seen": 6038912, + "step": 12275 + }, + { + "epoch": 1.6206942061501914, + "grad_norm": 0.5031221508979797, + "learning_rate": 1.709318494579755e-06, + "loss": 0.0934, + "num_input_tokens_seen": 6041472, + "step": 12280 + }, + { + "epoch": 1.6213540979279397, + "grad_norm": 0.1668480783700943, + "learning_rate": 1.7089936786784414e-06, + "loss": 0.0024, + "num_input_tokens_seen": 6043584, + "step": 12285 + }, + { + "epoch": 1.6220139897056882, + "grad_norm": 0.12070733308792114, + "learning_rate": 1.708668712298353e-06, + "loss": 0.059, + "num_input_tokens_seen": 6046144, + "step": 12290 + }, + { + "epoch": 1.6226738814834367, + "grad_norm": 0.07423515617847443, + "learning_rate": 1.7083435955084627e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6048512, + "step": 12295 + }, + { + "epoch": 1.6233337732611852, + "grad_norm": 0.13812686502933502, + "learning_rate": 1.7080183283777733e-06, + "loss": 0.0526, + "num_input_tokens_seen": 6050560, + "step": 12300 + }, + { + "epoch": 1.6239936650389337, + "grad_norm": 0.06522795557975769, + "learning_rate": 1.707692910975321e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6053056, + "step": 12305 + }, + { + "epoch": 1.624653556816682, + "grad_norm": 0.1586804836988449, + "learning_rate": 1.7073673433701733e-06, + "loss": 0.0247, + "num_input_tokens_seen": 6055616, + "step": 12310 + }, + { + "epoch": 1.6253134485944305, + "grad_norm": 33.24040603637695, + "learning_rate": 1.7070416256314286e-06, + "loss": 0.0355, + "num_input_tokens_seen": 6057728, + "step": 12315 + }, + { + "epoch": 1.625973340372179, + "grad_norm": 0.3471306264400482, + "learning_rate": 1.7067157578282195e-06, + "loss": 0.0661, + "num_input_tokens_seen": 6060288, + "step": 12320 + }, + { + "epoch": 1.6266332321499273, + "grad_norm": 31.32708740234375, + "learning_rate": 1.7063897400297083e-06, + "loss": 0.0912, + "num_input_tokens_seen": 6062656, + "step": 12325 + }, + { + "epoch": 1.627293123927676, + "grad_norm": 0.024469556286931038, + "learning_rate": 1.7060635723050899e-06, + "loss": 0.0712, + "num_input_tokens_seen": 6065088, + "step": 12330 + }, + { + "epoch": 1.6279530157054243, + "grad_norm": 0.04165755584836006, + "learning_rate": 1.705737254723591e-06, + "loss": 0.0004, + "num_input_tokens_seen": 6067392, + "step": 12335 + }, + { + "epoch": 1.6286129074831728, + "grad_norm": 12.173465728759766, + "learning_rate": 1.7054107873544704e-06, + "loss": 0.1991, + "num_input_tokens_seen": 6069760, + "step": 12340 + }, + { + "epoch": 1.6292727992609213, + "grad_norm": 0.069599948823452, + "learning_rate": 1.7050841702670188e-06, + "loss": 0.0003, + "num_input_tokens_seen": 6072256, + "step": 12345 + }, + { + "epoch": 1.6299326910386696, + "grad_norm": 0.08773674815893173, + "learning_rate": 1.7047574035305576e-06, + "loss": 0.0482, + "num_input_tokens_seen": 6074688, + "step": 12350 + }, + { + "epoch": 1.630592582816418, + "grad_norm": 0.009780521504580975, + "learning_rate": 1.704430487214441e-06, + "loss": 0.0002, + "num_input_tokens_seen": 6077184, + "step": 12355 + }, + { + "epoch": 1.6312524745941666, + "grad_norm": 1.1269195079803467, + "learning_rate": 1.7041034213880545e-06, + "loss": 0.001, + "num_input_tokens_seen": 6079424, + "step": 12360 + }, + { + "epoch": 1.631912366371915, + "grad_norm": 0.014932355843484402, + "learning_rate": 1.7037762061208157e-06, + "loss": 0.0385, + "num_input_tokens_seen": 6081920, + "step": 12365 + }, + { + "epoch": 1.6325722581496636, + "grad_norm": 0.028511904180049896, + "learning_rate": 1.7034488414821734e-06, + "loss": 0.115, + "num_input_tokens_seen": 6084352, + "step": 12370 + }, + { + "epoch": 1.6332321499274118, + "grad_norm": 0.017234528437256813, + "learning_rate": 1.7031213275416083e-06, + "loss": 0.0007, + "num_input_tokens_seen": 6086848, + "step": 12375 + }, + { + "epoch": 1.6338920417051603, + "grad_norm": 37.32754898071289, + "learning_rate": 1.702793664368633e-06, + "loss": 0.1038, + "num_input_tokens_seen": 6089344, + "step": 12380 + }, + { + "epoch": 1.6345519334829088, + "grad_norm": 0.03338843956589699, + "learning_rate": 1.702465852032792e-06, + "loss": 0.0722, + "num_input_tokens_seen": 6091840, + "step": 12385 + }, + { + "epoch": 1.6352118252606571, + "grad_norm": 0.08052528649568558, + "learning_rate": 1.7021378906036607e-06, + "loss": 0.0003, + "num_input_tokens_seen": 6094144, + "step": 12390 + }, + { + "epoch": 1.6358717170384058, + "grad_norm": 0.13955703377723694, + "learning_rate": 1.7018097801508467e-06, + "loss": 0.0003, + "num_input_tokens_seen": 6096448, + "step": 12395 + }, + { + "epoch": 1.6365316088161541, + "grad_norm": 0.022091159597039223, + "learning_rate": 1.7014815207439884e-06, + "loss": 0.1521, + "num_input_tokens_seen": 6098816, + "step": 12400 + }, + { + "epoch": 1.6371915005939026, + "grad_norm": 0.06733556091785431, + "learning_rate": 1.7011531124527578e-06, + "loss": 0.1457, + "num_input_tokens_seen": 6101312, + "step": 12405 + }, + { + "epoch": 1.6378513923716511, + "grad_norm": 0.060561731457710266, + "learning_rate": 1.7008245553468559e-06, + "loss": 0.0004, + "num_input_tokens_seen": 6103936, + "step": 12410 + }, + { + "epoch": 1.6385112841493994, + "grad_norm": 0.8273510932922363, + "learning_rate": 1.7004958494960173e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6106624, + "step": 12415 + }, + { + "epoch": 1.639171175927148, + "grad_norm": 13.756531715393066, + "learning_rate": 1.7001669949700073e-06, + "loss": 0.0675, + "num_input_tokens_seen": 6109440, + "step": 12420 + }, + { + "epoch": 1.6398310677048964, + "grad_norm": 0.07977482676506042, + "learning_rate": 1.6998379918386228e-06, + "loss": 0.0088, + "num_input_tokens_seen": 6111680, + "step": 12425 + }, + { + "epoch": 1.640490959482645, + "grad_norm": 20.452110290527344, + "learning_rate": 1.6995088401716922e-06, + "loss": 0.1804, + "num_input_tokens_seen": 6114112, + "step": 12430 + }, + { + "epoch": 1.6411508512603934, + "grad_norm": 19.552038192749023, + "learning_rate": 1.6991795400390756e-06, + "loss": 0.1808, + "num_input_tokens_seen": 6116544, + "step": 12435 + }, + { + "epoch": 1.6418107430381417, + "grad_norm": 0.010304759256541729, + "learning_rate": 1.698850091510665e-06, + "loss": 0.001, + "num_input_tokens_seen": 6119104, + "step": 12440 + }, + { + "epoch": 1.6424706348158902, + "grad_norm": 20.534870147705078, + "learning_rate": 1.6985204946563831e-06, + "loss": 0.1505, + "num_input_tokens_seen": 6121408, + "step": 12445 + }, + { + "epoch": 1.6431305265936387, + "grad_norm": 11.33122444152832, + "learning_rate": 1.6981907495461845e-06, + "loss": 0.0506, + "num_input_tokens_seen": 6124096, + "step": 12450 + }, + { + "epoch": 1.643790418371387, + "grad_norm": 0.34447240829467773, + "learning_rate": 1.697860856250055e-06, + "loss": 0.001, + "num_input_tokens_seen": 6126208, + "step": 12455 + }, + { + "epoch": 1.6444503101491357, + "grad_norm": 25.78012466430664, + "learning_rate": 1.6975308148380125e-06, + "loss": 0.1091, + "num_input_tokens_seen": 6128704, + "step": 12460 + }, + { + "epoch": 1.645110201926884, + "grad_norm": 1.1186546087265015, + "learning_rate": 1.6972006253801055e-06, + "loss": 0.0021, + "num_input_tokens_seen": 6130944, + "step": 12465 + }, + { + "epoch": 1.6457700937046325, + "grad_norm": 391.4671630859375, + "learning_rate": 1.6968702879464148e-06, + "loss": 0.1985, + "num_input_tokens_seen": 6133248, + "step": 12470 + }, + { + "epoch": 1.646429985482381, + "grad_norm": 20.856117248535156, + "learning_rate": 1.6965398026070517e-06, + "loss": 0.2054, + "num_input_tokens_seen": 6135744, + "step": 12475 + }, + { + "epoch": 1.6470898772601292, + "grad_norm": 16.320039749145508, + "learning_rate": 1.6962091694321595e-06, + "loss": 0.0839, + "num_input_tokens_seen": 6138112, + "step": 12480 + }, + { + "epoch": 1.647749769037878, + "grad_norm": 0.07969659566879272, + "learning_rate": 1.6958783884919124e-06, + "loss": 0.1114, + "num_input_tokens_seen": 6140608, + "step": 12485 + }, + { + "epoch": 1.6484096608156262, + "grad_norm": 0.07572974264621735, + "learning_rate": 1.6955474598565163e-06, + "loss": 0.0034, + "num_input_tokens_seen": 6143104, + "step": 12490 + }, + { + "epoch": 1.6490695525933747, + "grad_norm": 0.6827272176742554, + "learning_rate": 1.695216383596209e-06, + "loss": 0.0456, + "num_input_tokens_seen": 6145536, + "step": 12495 + }, + { + "epoch": 1.6497294443711232, + "grad_norm": 0.06942526996135712, + "learning_rate": 1.6948851597812586e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6148096, + "step": 12500 + }, + { + "epoch": 1.6503893361488715, + "grad_norm": 0.541210949420929, + "learning_rate": 1.694553788481965e-06, + "loss": 0.1411, + "num_input_tokens_seen": 6150976, + "step": 12505 + }, + { + "epoch": 1.65104922792662, + "grad_norm": 103.4432601928711, + "learning_rate": 1.6942222697686593e-06, + "loss": 0.1109, + "num_input_tokens_seen": 6153408, + "step": 12510 + }, + { + "epoch": 1.6517091197043685, + "grad_norm": 0.04951288178563118, + "learning_rate": 1.6938906037117039e-06, + "loss": 0.0007, + "num_input_tokens_seen": 6156032, + "step": 12515 + }, + { + "epoch": 1.6523690114821168, + "grad_norm": 0.344783216714859, + "learning_rate": 1.6935587903814926e-06, + "loss": 0.003, + "num_input_tokens_seen": 6158784, + "step": 12520 + }, + { + "epoch": 1.6530289032598655, + "grad_norm": 0.008561319671571255, + "learning_rate": 1.6932268298484508e-06, + "loss": 0.0005, + "num_input_tokens_seen": 6161408, + "step": 12525 + }, + { + "epoch": 1.6536887950376138, + "grad_norm": 16.31315803527832, + "learning_rate": 1.692894722183034e-06, + "loss": 0.0756, + "num_input_tokens_seen": 6163712, + "step": 12530 + }, + { + "epoch": 1.6543486868153623, + "grad_norm": 0.023284809663891792, + "learning_rate": 1.6925624674557298e-06, + "loss": 0.0003, + "num_input_tokens_seen": 6165952, + "step": 12535 + }, + { + "epoch": 1.6550085785931108, + "grad_norm": 0.14583520591259003, + "learning_rate": 1.6922300657370573e-06, + "loss": 0.0918, + "num_input_tokens_seen": 6168192, + "step": 12540 + }, + { + "epoch": 1.655668470370859, + "grad_norm": 0.03759438171982765, + "learning_rate": 1.691897517097566e-06, + "loss": 0.0002, + "num_input_tokens_seen": 6170496, + "step": 12545 + }, + { + "epoch": 1.6563283621486078, + "grad_norm": 0.370606392621994, + "learning_rate": 1.6915648216078374e-06, + "loss": 0.0005, + "num_input_tokens_seen": 6173184, + "step": 12550 + }, + { + "epoch": 1.656988253926356, + "grad_norm": 0.037230778485536575, + "learning_rate": 1.691231979338483e-06, + "loss": 0.0884, + "num_input_tokens_seen": 6175552, + "step": 12555 + }, + { + "epoch": 1.6576481457041046, + "grad_norm": 0.02874135971069336, + "learning_rate": 1.690898990360146e-06, + "loss": 0.1239, + "num_input_tokens_seen": 6178048, + "step": 12560 + }, + { + "epoch": 1.658308037481853, + "grad_norm": 0.015778981149196625, + "learning_rate": 1.690565854743502e-06, + "loss": 0.1286, + "num_input_tokens_seen": 6180544, + "step": 12565 + }, + { + "epoch": 1.6589679292596013, + "grad_norm": 0.028208622708916664, + "learning_rate": 1.690232572559256e-06, + "loss": 0.0003, + "num_input_tokens_seen": 6183040, + "step": 12570 + }, + { + "epoch": 1.6596278210373498, + "grad_norm": 0.011002824641764164, + "learning_rate": 1.6898991438781445e-06, + "loss": 0.1679, + "num_input_tokens_seen": 6185984, + "step": 12575 + }, + { + "epoch": 1.6602877128150983, + "grad_norm": 0.16241048276424408, + "learning_rate": 1.6895655687709356e-06, + "loss": 0.0464, + "num_input_tokens_seen": 6188480, + "step": 12580 + }, + { + "epoch": 1.6609476045928466, + "grad_norm": 97.199951171875, + "learning_rate": 1.6892318473084283e-06, + "loss": 0.071, + "num_input_tokens_seen": 6191104, + "step": 12585 + }, + { + "epoch": 1.6616074963705953, + "grad_norm": 0.041437387466430664, + "learning_rate": 1.6888979795614524e-06, + "loss": 0.404, + "num_input_tokens_seen": 6193664, + "step": 12590 + }, + { + "epoch": 1.6622673881483436, + "grad_norm": 0.7268674373626709, + "learning_rate": 1.688563965600869e-06, + "loss": 0.1, + "num_input_tokens_seen": 6196480, + "step": 12595 + }, + { + "epoch": 1.6629272799260921, + "grad_norm": 11.610241889953613, + "learning_rate": 1.68822980549757e-06, + "loss": 0.1222, + "num_input_tokens_seen": 6199104, + "step": 12600 + }, + { + "epoch": 1.6635871717038406, + "grad_norm": 0.09228473156690598, + "learning_rate": 1.6878954993224786e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6201280, + "step": 12605 + }, + { + "epoch": 1.664247063481589, + "grad_norm": 0.27008455991744995, + "learning_rate": 1.687561047146549e-06, + "loss": 0.0029, + "num_input_tokens_seen": 6203776, + "step": 12610 + }, + { + "epoch": 1.6649069552593376, + "grad_norm": 0.06659281998872757, + "learning_rate": 1.6872264490407656e-06, + "loss": 0.1658, + "num_input_tokens_seen": 6206464, + "step": 12615 + }, + { + "epoch": 1.665566847037086, + "grad_norm": 0.6453077793121338, + "learning_rate": 1.686891705076145e-06, + "loss": 0.003, + "num_input_tokens_seen": 6208896, + "step": 12620 + }, + { + "epoch": 1.6662267388148344, + "grad_norm": 0.07216670364141464, + "learning_rate": 1.6865568153237343e-06, + "loss": 0.061, + "num_input_tokens_seen": 6211136, + "step": 12625 + }, + { + "epoch": 1.666886630592583, + "grad_norm": 30.78990936279297, + "learning_rate": 1.6862217798546115e-06, + "loss": 0.0895, + "num_input_tokens_seen": 6213568, + "step": 12630 + }, + { + "epoch": 1.6675465223703312, + "grad_norm": 18.963054656982422, + "learning_rate": 1.6858865987398847e-06, + "loss": 0.2668, + "num_input_tokens_seen": 6215936, + "step": 12635 + }, + { + "epoch": 1.6682064141480797, + "grad_norm": 0.05866992846131325, + "learning_rate": 1.6855512720506941e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6218560, + "step": 12640 + }, + { + "epoch": 1.6688663059258282, + "grad_norm": 0.7241128087043762, + "learning_rate": 1.6852157998582106e-06, + "loss": 0.1121, + "num_input_tokens_seen": 6221184, + "step": 12645 + }, + { + "epoch": 1.6695261977035765, + "grad_norm": 0.14978912472724915, + "learning_rate": 1.6848801822336355e-06, + "loss": 0.0836, + "num_input_tokens_seen": 6223552, + "step": 12650 + }, + { + "epoch": 1.6701860894813252, + "grad_norm": 0.06732896715402603, + "learning_rate": 1.684544419248201e-06, + "loss": 0.0497, + "num_input_tokens_seen": 6226304, + "step": 12655 + }, + { + "epoch": 1.6708459812590735, + "grad_norm": 0.030299250036478043, + "learning_rate": 1.6842085109731708e-06, + "loss": 0.1245, + "num_input_tokens_seen": 6228864, + "step": 12660 + }, + { + "epoch": 1.671505873036822, + "grad_norm": 0.1021413654088974, + "learning_rate": 1.6838724574798387e-06, + "loss": 0.0763, + "num_input_tokens_seen": 6231552, + "step": 12665 + }, + { + "epoch": 1.6721657648145705, + "grad_norm": 0.10425157845020294, + "learning_rate": 1.6835362588395298e-06, + "loss": 0.0017, + "num_input_tokens_seen": 6233856, + "step": 12670 + }, + { + "epoch": 1.6728256565923187, + "grad_norm": 0.1030372902750969, + "learning_rate": 1.6831999151235995e-06, + "loss": 0.0827, + "num_input_tokens_seen": 6236928, + "step": 12675 + }, + { + "epoch": 1.6734855483700675, + "grad_norm": 19.882171630859375, + "learning_rate": 1.682863426403435e-06, + "loss": 0.0704, + "num_input_tokens_seen": 6239552, + "step": 12680 + }, + { + "epoch": 1.6741454401478157, + "grad_norm": 108.14405822753906, + "learning_rate": 1.682526792750453e-06, + "loss": 0.0288, + "num_input_tokens_seen": 6241920, + "step": 12685 + }, + { + "epoch": 1.6748053319255642, + "grad_norm": 0.06011539697647095, + "learning_rate": 1.6821900142361015e-06, + "loss": 0.2077, + "num_input_tokens_seen": 6244160, + "step": 12690 + }, + { + "epoch": 1.6754652237033127, + "grad_norm": 13.853163719177246, + "learning_rate": 1.6818530909318595e-06, + "loss": 0.2284, + "num_input_tokens_seen": 6246720, + "step": 12695 + }, + { + "epoch": 1.676125115481061, + "grad_norm": 14.427210807800293, + "learning_rate": 1.6815160229092367e-06, + "loss": 0.0043, + "num_input_tokens_seen": 6249088, + "step": 12700 + }, + { + "epoch": 1.6767850072588095, + "grad_norm": 13.329300880432129, + "learning_rate": 1.6811788102397733e-06, + "loss": 0.2497, + "num_input_tokens_seen": 6251456, + "step": 12705 + }, + { + "epoch": 1.677444899036558, + "grad_norm": 0.5812211036682129, + "learning_rate": 1.68084145299504e-06, + "loss": 0.0581, + "num_input_tokens_seen": 6254272, + "step": 12710 + }, + { + "epoch": 1.6781047908143063, + "grad_norm": 0.2836822271347046, + "learning_rate": 1.6805039512466385e-06, + "loss": 0.0853, + "num_input_tokens_seen": 6256704, + "step": 12715 + }, + { + "epoch": 1.678764682592055, + "grad_norm": 0.2207585871219635, + "learning_rate": 1.6801663050662012e-06, + "loss": 0.0476, + "num_input_tokens_seen": 6259072, + "step": 12720 + }, + { + "epoch": 1.6794245743698033, + "grad_norm": 0.47076278924942017, + "learning_rate": 1.6798285145253907e-06, + "loss": 0.0021, + "num_input_tokens_seen": 6261632, + "step": 12725 + }, + { + "epoch": 1.6800844661475518, + "grad_norm": 0.46993520855903625, + "learning_rate": 1.6794905796959017e-06, + "loss": 0.047, + "num_input_tokens_seen": 6264192, + "step": 12730 + }, + { + "epoch": 1.6807443579253003, + "grad_norm": 0.058821726590394974, + "learning_rate": 1.6791525006494572e-06, + "loss": 0.0419, + "num_input_tokens_seen": 6266624, + "step": 12735 + }, + { + "epoch": 1.6814042497030486, + "grad_norm": 0.04777168855071068, + "learning_rate": 1.6788142774578126e-06, + "loss": 0.3103, + "num_input_tokens_seen": 6269056, + "step": 12740 + }, + { + "epoch": 1.6820641414807973, + "grad_norm": 0.05101204663515091, + "learning_rate": 1.678475910192753e-06, + "loss": 0.0926, + "num_input_tokens_seen": 6271296, + "step": 12745 + }, + { + "epoch": 1.6827240332585456, + "grad_norm": 11.261943817138672, + "learning_rate": 1.6781373989260948e-06, + "loss": 0.2126, + "num_input_tokens_seen": 6273600, + "step": 12750 + }, + { + "epoch": 1.683383925036294, + "grad_norm": 23.232006072998047, + "learning_rate": 1.6777987437296842e-06, + "loss": 0.0928, + "num_input_tokens_seen": 6276160, + "step": 12755 + }, + { + "epoch": 1.6840438168140426, + "grad_norm": 0.16996802389621735, + "learning_rate": 1.6774599446753984e-06, + "loss": 0.0657, + "num_input_tokens_seen": 6278720, + "step": 12760 + }, + { + "epoch": 1.6847037085917909, + "grad_norm": 0.07030691206455231, + "learning_rate": 1.6771210018351453e-06, + "loss": 0.2448, + "num_input_tokens_seen": 6281152, + "step": 12765 + }, + { + "epoch": 1.6853636003695394, + "grad_norm": 0.1533779799938202, + "learning_rate": 1.6767819152808627e-06, + "loss": 0.0394, + "num_input_tokens_seen": 6283392, + "step": 12770 + }, + { + "epoch": 1.6860234921472879, + "grad_norm": 16.754724502563477, + "learning_rate": 1.6764426850845194e-06, + "loss": 0.064, + "num_input_tokens_seen": 6285504, + "step": 12775 + }, + { + "epoch": 1.6866833839250361, + "grad_norm": 5.0514678955078125, + "learning_rate": 1.676103311318115e-06, + "loss": 0.306, + "num_input_tokens_seen": 6287936, + "step": 12780 + }, + { + "epoch": 1.6873432757027849, + "grad_norm": 1.2993375062942505, + "learning_rate": 1.6757637940536787e-06, + "loss": 0.0701, + "num_input_tokens_seen": 6290496, + "step": 12785 + }, + { + "epoch": 1.6880031674805331, + "grad_norm": 0.2400226593017578, + "learning_rate": 1.6754241333632705e-06, + "loss": 0.0025, + "num_input_tokens_seen": 6293056, + "step": 12790 + }, + { + "epoch": 1.6886630592582816, + "grad_norm": 0.1520133912563324, + "learning_rate": 1.6750843293189806e-06, + "loss": 0.0408, + "num_input_tokens_seen": 6295488, + "step": 12795 + }, + { + "epoch": 1.6893229510360301, + "grad_norm": 0.32761409878730774, + "learning_rate": 1.674744381992931e-06, + "loss": 0.0907, + "num_input_tokens_seen": 6297856, + "step": 12800 + }, + { + "epoch": 1.6899828428137784, + "grad_norm": 1.6226130723953247, + "learning_rate": 1.674404291457272e-06, + "loss": 0.18, + "num_input_tokens_seen": 6300160, + "step": 12805 + }, + { + "epoch": 1.6906427345915271, + "grad_norm": 13.813915252685547, + "learning_rate": 1.6740640577841862e-06, + "loss": 0.1037, + "num_input_tokens_seen": 6302976, + "step": 12810 + }, + { + "epoch": 1.6913026263692754, + "grad_norm": 0.11801814287900925, + "learning_rate": 1.673723681045885e-06, + "loss": 0.003, + "num_input_tokens_seen": 6305408, + "step": 12815 + }, + { + "epoch": 1.691962518147024, + "grad_norm": 0.07843796163797379, + "learning_rate": 1.6733831613146113e-06, + "loss": 0.001, + "num_input_tokens_seen": 6308160, + "step": 12820 + }, + { + "epoch": 1.6926224099247724, + "grad_norm": 45.090911865234375, + "learning_rate": 1.673042498662638e-06, + "loss": 0.101, + "num_input_tokens_seen": 6310656, + "step": 12825 + }, + { + "epoch": 1.6932823017025207, + "grad_norm": 0.052833192050457, + "learning_rate": 1.672701693162268e-06, + "loss": 0.0167, + "num_input_tokens_seen": 6313024, + "step": 12830 + }, + { + "epoch": 1.6939421934802692, + "grad_norm": 0.3140457570552826, + "learning_rate": 1.672360744885835e-06, + "loss": 0.0606, + "num_input_tokens_seen": 6315584, + "step": 12835 + }, + { + "epoch": 1.6946020852580177, + "grad_norm": 0.043698202818632126, + "learning_rate": 1.6720196539057025e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6318016, + "step": 12840 + }, + { + "epoch": 1.695261977035766, + "grad_norm": 24.684247970581055, + "learning_rate": 1.671678420294265e-06, + "loss": 0.2314, + "num_input_tokens_seen": 6320896, + "step": 12845 + }, + { + "epoch": 1.6959218688135147, + "grad_norm": 17.751066207885742, + "learning_rate": 1.6713370441239469e-06, + "loss": 0.0503, + "num_input_tokens_seen": 6323328, + "step": 12850 + }, + { + "epoch": 1.696581760591263, + "grad_norm": 0.4494774043560028, + "learning_rate": 1.6709955254672026e-06, + "loss": 0.1382, + "num_input_tokens_seen": 6325760, + "step": 12855 + }, + { + "epoch": 1.6972416523690115, + "grad_norm": 0.04193054139614105, + "learning_rate": 1.670653864396517e-06, + "loss": 0.066, + "num_input_tokens_seen": 6328256, + "step": 12860 + }, + { + "epoch": 1.69790154414676, + "grad_norm": 0.06418413668870926, + "learning_rate": 1.670312060984405e-06, + "loss": 0.1217, + "num_input_tokens_seen": 6330816, + "step": 12865 + }, + { + "epoch": 1.6985614359245083, + "grad_norm": 14.279733657836914, + "learning_rate": 1.669970115303412e-06, + "loss": 0.1156, + "num_input_tokens_seen": 6333184, + "step": 12870 + }, + { + "epoch": 1.699221327702257, + "grad_norm": 0.7062100768089294, + "learning_rate": 1.6696280274261137e-06, + "loss": 0.0019, + "num_input_tokens_seen": 6335744, + "step": 12875 + }, + { + "epoch": 1.6998812194800053, + "grad_norm": 15.306600570678711, + "learning_rate": 1.6692857974251156e-06, + "loss": 0.1754, + "num_input_tokens_seen": 6338240, + "step": 12880 + }, + { + "epoch": 1.7005411112577538, + "grad_norm": 0.228725865483284, + "learning_rate": 1.668943425373054e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6340672, + "step": 12885 + }, + { + "epoch": 1.7012010030355023, + "grad_norm": 0.060659151524305344, + "learning_rate": 1.668600911342594e-06, + "loss": 0.0742, + "num_input_tokens_seen": 6343104, + "step": 12890 + }, + { + "epoch": 1.7018608948132505, + "grad_norm": 0.029327716678380966, + "learning_rate": 1.668258255406432e-06, + "loss": 0.0573, + "num_input_tokens_seen": 6345856, + "step": 12895 + }, + { + "epoch": 1.702520786590999, + "grad_norm": 433.2555236816406, + "learning_rate": 1.6679154576372949e-06, + "loss": 0.1826, + "num_input_tokens_seen": 6348224, + "step": 12900 + }, + { + "epoch": 1.7031806783687475, + "grad_norm": 3.740283489227295, + "learning_rate": 1.6675725181079384e-06, + "loss": 0.1554, + "num_input_tokens_seen": 6350784, + "step": 12905 + }, + { + "epoch": 1.7038405701464958, + "grad_norm": 0.37366318702697754, + "learning_rate": 1.6672294368911493e-06, + "loss": 0.0764, + "num_input_tokens_seen": 6353344, + "step": 12910 + }, + { + "epoch": 1.7045004619242445, + "grad_norm": 23.09748077392578, + "learning_rate": 1.6668862140597434e-06, + "loss": 0.0498, + "num_input_tokens_seen": 6355584, + "step": 12915 + }, + { + "epoch": 1.7051603537019928, + "grad_norm": 0.9731279611587524, + "learning_rate": 1.6665428496865684e-06, + "loss": 0.0021, + "num_input_tokens_seen": 6358208, + "step": 12920 + }, + { + "epoch": 1.7058202454797413, + "grad_norm": 0.08989608287811279, + "learning_rate": 1.6661993438445e-06, + "loss": 0.1555, + "num_input_tokens_seen": 6360576, + "step": 12925 + }, + { + "epoch": 1.7064801372574898, + "grad_norm": 11.549619674682617, + "learning_rate": 1.665855696606445e-06, + "loss": 0.0649, + "num_input_tokens_seen": 6362944, + "step": 12930 + }, + { + "epoch": 1.707140029035238, + "grad_norm": 14.804794311523438, + "learning_rate": 1.6655119080453402e-06, + "loss": 0.1484, + "num_input_tokens_seen": 6365440, + "step": 12935 + }, + { + "epoch": 1.7077999208129868, + "grad_norm": 0.05886728689074516, + "learning_rate": 1.6651679782341524e-06, + "loss": 0.1357, + "num_input_tokens_seen": 6367808, + "step": 12940 + }, + { + "epoch": 1.708459812590735, + "grad_norm": 8.854753494262695, + "learning_rate": 1.6648239072458777e-06, + "loss": 0.1377, + "num_input_tokens_seen": 6370304, + "step": 12945 + }, + { + "epoch": 1.7091197043684836, + "grad_norm": 1.3267241716384888, + "learning_rate": 1.6644796951535432e-06, + "loss": 0.0023, + "num_input_tokens_seen": 6372544, + "step": 12950 + }, + { + "epoch": 1.709779596146232, + "grad_norm": 0.04999571666121483, + "learning_rate": 1.664135342030205e-06, + "loss": 0.0224, + "num_input_tokens_seen": 6375232, + "step": 12955 + }, + { + "epoch": 1.7104394879239804, + "grad_norm": 0.20558416843414307, + "learning_rate": 1.6637908479489496e-06, + "loss": 0.0619, + "num_input_tokens_seen": 6377664, + "step": 12960 + }, + { + "epoch": 1.7110993797017289, + "grad_norm": 14.066550254821777, + "learning_rate": 1.6634462129828938e-06, + "loss": 0.1494, + "num_input_tokens_seen": 6380032, + "step": 12965 + }, + { + "epoch": 1.7117592714794774, + "grad_norm": 0.4319327771663666, + "learning_rate": 1.6631014372051836e-06, + "loss": 0.0607, + "num_input_tokens_seen": 6382464, + "step": 12970 + }, + { + "epoch": 1.7124191632572257, + "grad_norm": 0.3865269124507904, + "learning_rate": 1.6627565206889953e-06, + "loss": 0.1611, + "num_input_tokens_seen": 6384512, + "step": 12975 + }, + { + "epoch": 1.7130790550349744, + "grad_norm": 0.09151875227689743, + "learning_rate": 1.6624114635075344e-06, + "loss": 0.0027, + "num_input_tokens_seen": 6387072, + "step": 12980 + }, + { + "epoch": 1.7137389468127227, + "grad_norm": 0.26752570271492004, + "learning_rate": 1.6620662657340371e-06, + "loss": 0.0497, + "num_input_tokens_seen": 6389696, + "step": 12985 + }, + { + "epoch": 1.7143988385904712, + "grad_norm": 0.33701422810554504, + "learning_rate": 1.66172092744177e-06, + "loss": 0.0436, + "num_input_tokens_seen": 6392064, + "step": 12990 + }, + { + "epoch": 1.7150587303682197, + "grad_norm": 0.3485437035560608, + "learning_rate": 1.661375448704027e-06, + "loss": 0.0485, + "num_input_tokens_seen": 6394816, + "step": 12995 + }, + { + "epoch": 1.715718622145968, + "grad_norm": 13.659415245056152, + "learning_rate": 1.6610298295941347e-06, + "loss": 0.1353, + "num_input_tokens_seen": 6397376, + "step": 13000 + }, + { + "epoch": 1.7163785139237167, + "grad_norm": 71.0516128540039, + "learning_rate": 1.6606840701854476e-06, + "loss": 0.1308, + "num_input_tokens_seen": 6399936, + "step": 13005 + }, + { + "epoch": 1.717038405701465, + "grad_norm": 15.572142601013184, + "learning_rate": 1.660338170551351e-06, + "loss": 0.1169, + "num_input_tokens_seen": 6402560, + "step": 13010 + }, + { + "epoch": 1.7176982974792134, + "grad_norm": 39.39515686035156, + "learning_rate": 1.6599921307652598e-06, + "loss": 0.081, + "num_input_tokens_seen": 6404928, + "step": 13015 + }, + { + "epoch": 1.718358189256962, + "grad_norm": 0.8864211440086365, + "learning_rate": 1.659645950900618e-06, + "loss": 0.0065, + "num_input_tokens_seen": 6407552, + "step": 13020 + }, + { + "epoch": 1.7190180810347102, + "grad_norm": 0.14865431189537048, + "learning_rate": 1.6592996310308997e-06, + "loss": 0.1286, + "num_input_tokens_seen": 6410240, + "step": 13025 + }, + { + "epoch": 1.7196779728124587, + "grad_norm": 10.887186050415039, + "learning_rate": 1.658953171229609e-06, + "loss": 0.1372, + "num_input_tokens_seen": 6412480, + "step": 13030 + }, + { + "epoch": 1.7203378645902072, + "grad_norm": 31.245393753051758, + "learning_rate": 1.6586065715702797e-06, + "loss": 0.0805, + "num_input_tokens_seen": 6415104, + "step": 13035 + }, + { + "epoch": 1.7209977563679555, + "grad_norm": 0.3583381175994873, + "learning_rate": 1.658259832126475e-06, + "loss": 0.04, + "num_input_tokens_seen": 6417728, + "step": 13040 + }, + { + "epoch": 1.7216576481457042, + "grad_norm": 14.247611999511719, + "learning_rate": 1.6579129529717872e-06, + "loss": 0.1079, + "num_input_tokens_seen": 6420544, + "step": 13045 + }, + { + "epoch": 1.7223175399234525, + "grad_norm": 0.24372056126594543, + "learning_rate": 1.6575659341798396e-06, + "loss": 0.0819, + "num_input_tokens_seen": 6422976, + "step": 13050 + }, + { + "epoch": 1.722977431701201, + "grad_norm": 0.5238776206970215, + "learning_rate": 1.6572187758242842e-06, + "loss": 0.1217, + "num_input_tokens_seen": 6425216, + "step": 13055 + }, + { + "epoch": 1.7236373234789495, + "grad_norm": 0.1704397052526474, + "learning_rate": 1.6568714779788024e-06, + "loss": 0.3001, + "num_input_tokens_seen": 6427456, + "step": 13060 + }, + { + "epoch": 1.7242972152566978, + "grad_norm": 0.049737852066755295, + "learning_rate": 1.6565240407171067e-06, + "loss": 0.0018, + "num_input_tokens_seen": 6429824, + "step": 13065 + }, + { + "epoch": 1.7249571070344465, + "grad_norm": 14.743538856506348, + "learning_rate": 1.6561764641129371e-06, + "loss": 0.1615, + "num_input_tokens_seen": 6432128, + "step": 13070 + }, + { + "epoch": 1.7256169988121948, + "grad_norm": 0.7518868446350098, + "learning_rate": 1.655828748240065e-06, + "loss": 0.1621, + "num_input_tokens_seen": 6434560, + "step": 13075 + }, + { + "epoch": 1.7262768905899433, + "grad_norm": 0.31233370304107666, + "learning_rate": 1.6554808931722902e-06, + "loss": 0.0018, + "num_input_tokens_seen": 6437120, + "step": 13080 + }, + { + "epoch": 1.7269367823676918, + "grad_norm": 28.600543975830078, + "learning_rate": 1.6551328989834423e-06, + "loss": 0.113, + "num_input_tokens_seen": 6439616, + "step": 13085 + }, + { + "epoch": 1.72759667414544, + "grad_norm": 0.4299144744873047, + "learning_rate": 1.6547847657473805e-06, + "loss": 0.0015, + "num_input_tokens_seen": 6442240, + "step": 13090 + }, + { + "epoch": 1.7282565659231885, + "grad_norm": 0.061676811426877975, + "learning_rate": 1.654436493537994e-06, + "loss": 0.0015, + "num_input_tokens_seen": 6444864, + "step": 13095 + }, + { + "epoch": 1.728916457700937, + "grad_norm": 0.16284187138080597, + "learning_rate": 1.6540880824292008e-06, + "loss": 0.0301, + "num_input_tokens_seen": 6447296, + "step": 13100 + }, + { + "epoch": 1.7295763494786855, + "grad_norm": 0.05911894142627716, + "learning_rate": 1.6537395324949489e-06, + "loss": 0.1013, + "num_input_tokens_seen": 6449408, + "step": 13105 + }, + { + "epoch": 1.730236241256434, + "grad_norm": 0.01173966471105814, + "learning_rate": 1.6533908438092149e-06, + "loss": 0.0005, + "num_input_tokens_seen": 6451968, + "step": 13110 + }, + { + "epoch": 1.7308961330341823, + "grad_norm": 0.08755994588136673, + "learning_rate": 1.6530420164460055e-06, + "loss": 0.0571, + "num_input_tokens_seen": 6454272, + "step": 13115 + }, + { + "epoch": 1.7315560248119308, + "grad_norm": 27.81377410888672, + "learning_rate": 1.6526930504793576e-06, + "loss": 0.0762, + "num_input_tokens_seen": 6456640, + "step": 13120 + }, + { + "epoch": 1.7322159165896793, + "grad_norm": 0.029269201681017876, + "learning_rate": 1.6523439459833357e-06, + "loss": 0.001, + "num_input_tokens_seen": 6459136, + "step": 13125 + }, + { + "epoch": 1.7328758083674276, + "grad_norm": 41.80912780761719, + "learning_rate": 1.6519947030320356e-06, + "loss": 0.0997, + "num_input_tokens_seen": 6461376, + "step": 13130 + }, + { + "epoch": 1.7335357001451763, + "grad_norm": 0.032881107181310654, + "learning_rate": 1.651645321699581e-06, + "loss": 0.0785, + "num_input_tokens_seen": 6463936, + "step": 13135 + }, + { + "epoch": 1.7341955919229246, + "grad_norm": 17.294940948486328, + "learning_rate": 1.6512958020601256e-06, + "loss": 0.0607, + "num_input_tokens_seen": 6466432, + "step": 13140 + }, + { + "epoch": 1.734855483700673, + "grad_norm": 4.170657634735107, + "learning_rate": 1.6509461441878527e-06, + "loss": 0.0482, + "num_input_tokens_seen": 6468800, + "step": 13145 + }, + { + "epoch": 1.7355153754784216, + "grad_norm": 4.368320941925049, + "learning_rate": 1.6505963481569745e-06, + "loss": 0.0163, + "num_input_tokens_seen": 6471360, + "step": 13150 + }, + { + "epoch": 1.7361752672561699, + "grad_norm": 0.008847353979945183, + "learning_rate": 1.6502464140417326e-06, + "loss": 0.0005, + "num_input_tokens_seen": 6473856, + "step": 13155 + }, + { + "epoch": 1.7368351590339184, + "grad_norm": 204.90426635742188, + "learning_rate": 1.6498963419163978e-06, + "loss": 0.2147, + "num_input_tokens_seen": 6476288, + "step": 13160 + }, + { + "epoch": 1.7374950508116669, + "grad_norm": 0.08478450030088425, + "learning_rate": 1.6495461318552708e-06, + "loss": 0.1285, + "num_input_tokens_seen": 6478528, + "step": 13165 + }, + { + "epoch": 1.7381549425894154, + "grad_norm": 27.09694480895996, + "learning_rate": 1.6491957839326812e-06, + "loss": 0.0915, + "num_input_tokens_seen": 6481024, + "step": 13170 + }, + { + "epoch": 1.7388148343671639, + "grad_norm": 201.52137756347656, + "learning_rate": 1.6488452982229873e-06, + "loss": 0.0941, + "num_input_tokens_seen": 6483136, + "step": 13175 + }, + { + "epoch": 1.7394747261449122, + "grad_norm": 0.0733003094792366, + "learning_rate": 1.6484946748005773e-06, + "loss": 0.127, + "num_input_tokens_seen": 6485824, + "step": 13180 + }, + { + "epoch": 1.7401346179226607, + "grad_norm": 0.0202048197388649, + "learning_rate": 1.6481439137398688e-06, + "loss": 0.0833, + "num_input_tokens_seen": 6488384, + "step": 13185 + }, + { + "epoch": 1.7407945097004092, + "grad_norm": 0.060185838490724564, + "learning_rate": 1.6477930151153078e-06, + "loss": 0.0005, + "num_input_tokens_seen": 6491072, + "step": 13190 + }, + { + "epoch": 1.7414544014781574, + "grad_norm": 11.041423797607422, + "learning_rate": 1.6474419790013707e-06, + "loss": 0.0576, + "num_input_tokens_seen": 6493568, + "step": 13195 + }, + { + "epoch": 1.7421142932559062, + "grad_norm": 23.28339385986328, + "learning_rate": 1.6470908054725617e-06, + "loss": 0.0509, + "num_input_tokens_seen": 6496320, + "step": 13200 + }, + { + "epoch": 1.7427741850336544, + "grad_norm": 0.029013272374868393, + "learning_rate": 1.6467394946034152e-06, + "loss": 0.0011, + "num_input_tokens_seen": 6498560, + "step": 13205 + }, + { + "epoch": 1.743434076811403, + "grad_norm": 0.33198022842407227, + "learning_rate": 1.6463880464684942e-06, + "loss": 0.0763, + "num_input_tokens_seen": 6500928, + "step": 13210 + }, + { + "epoch": 1.7440939685891514, + "grad_norm": 15.056183815002441, + "learning_rate": 1.6460364611423911e-06, + "loss": 0.069, + "num_input_tokens_seen": 6503424, + "step": 13215 + }, + { + "epoch": 1.7447538603668997, + "grad_norm": 10.846746444702148, + "learning_rate": 1.6456847386997277e-06, + "loss": 0.1996, + "num_input_tokens_seen": 6505792, + "step": 13220 + }, + { + "epoch": 1.7454137521446482, + "grad_norm": 0.034079235047101974, + "learning_rate": 1.6453328792151537e-06, + "loss": 0.0445, + "num_input_tokens_seen": 6508160, + "step": 13225 + }, + { + "epoch": 1.7460736439223967, + "grad_norm": 0.034832458943128586, + "learning_rate": 1.6449808827633497e-06, + "loss": 0.0011, + "num_input_tokens_seen": 6510976, + "step": 13230 + }, + { + "epoch": 1.7467335357001452, + "grad_norm": 0.38657528162002563, + "learning_rate": 1.6446287494190237e-06, + "loss": 0.0272, + "num_input_tokens_seen": 6513216, + "step": 13235 + }, + { + "epoch": 1.7473934274778937, + "grad_norm": 58.905696868896484, + "learning_rate": 1.6442764792569136e-06, + "loss": 0.0498, + "num_input_tokens_seen": 6515904, + "step": 13240 + }, + { + "epoch": 1.748053319255642, + "grad_norm": 13.200113296508789, + "learning_rate": 1.6439240723517862e-06, + "loss": 0.1935, + "num_input_tokens_seen": 6518528, + "step": 13245 + }, + { + "epoch": 1.7487132110333905, + "grad_norm": 0.960753321647644, + "learning_rate": 1.6435715287784375e-06, + "loss": 0.0038, + "num_input_tokens_seen": 6520960, + "step": 13250 + }, + { + "epoch": 1.749373102811139, + "grad_norm": 0.26324328780174255, + "learning_rate": 1.643218848611692e-06, + "loss": 0.0558, + "num_input_tokens_seen": 6523520, + "step": 13255 + }, + { + "epoch": 1.7500329945888873, + "grad_norm": 38.99361038208008, + "learning_rate": 1.642866031926404e-06, + "loss": 0.2054, + "num_input_tokens_seen": 6526080, + "step": 13260 + }, + { + "epoch": 1.750692886366636, + "grad_norm": 0.039592694491147995, + "learning_rate": 1.6425130787974558e-06, + "loss": 0.342, + "num_input_tokens_seen": 6528448, + "step": 13265 + }, + { + "epoch": 1.750692886366636, + "eval_loss": 0.11515690386295319, + "eval_runtime": 7.8352, + "eval_samples_per_second": 859.579, + "eval_steps_per_second": 107.463, + "num_input_tokens_seen": 6528448, + "step": 13265 + }, + { + "epoch": 1.7513527781443843, + "grad_norm": 0.28292742371559143, + "learning_rate": 1.6421599892997596e-06, + "loss": 0.0822, + "num_input_tokens_seen": 6531136, + "step": 13270 + }, + { + "epoch": 1.7520126699221328, + "grad_norm": 0.0518592968583107, + "learning_rate": 1.6418067635082555e-06, + "loss": 0.2525, + "num_input_tokens_seen": 6533824, + "step": 13275 + }, + { + "epoch": 1.7526725616998813, + "grad_norm": 22.125919342041016, + "learning_rate": 1.6414534014979138e-06, + "loss": 0.1643, + "num_input_tokens_seen": 6536256, + "step": 13280 + }, + { + "epoch": 1.7533324534776296, + "grad_norm": 0.2832372188568115, + "learning_rate": 1.6410999033437323e-06, + "loss": 0.0185, + "num_input_tokens_seen": 6538688, + "step": 13285 + }, + { + "epoch": 1.7539923452553783, + "grad_norm": 0.6796748042106628, + "learning_rate": 1.640746269120739e-06, + "loss": 0.0024, + "num_input_tokens_seen": 6541376, + "step": 13290 + }, + { + "epoch": 1.7546522370331266, + "grad_norm": 0.2318210005760193, + "learning_rate": 1.6403924989039899e-06, + "loss": 0.001, + "num_input_tokens_seen": 6543744, + "step": 13295 + }, + { + "epoch": 1.755312128810875, + "grad_norm": 15.547673225402832, + "learning_rate": 1.6400385927685706e-06, + "loss": 0.0786, + "num_input_tokens_seen": 6546112, + "step": 13300 + }, + { + "epoch": 1.7559720205886236, + "grad_norm": 0.2586086392402649, + "learning_rate": 1.6396845507895942e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6548544, + "step": 13305 + }, + { + "epoch": 1.7566319123663718, + "grad_norm": 0.3337395191192627, + "learning_rate": 1.6393303730422046e-06, + "loss": 0.0397, + "num_input_tokens_seen": 6550976, + "step": 13310 + }, + { + "epoch": 1.7572918041441203, + "grad_norm": 0.03228778764605522, + "learning_rate": 1.6389760596015727e-06, + "loss": 0.027, + "num_input_tokens_seen": 6553536, + "step": 13315 + }, + { + "epoch": 1.7579516959218688, + "grad_norm": 15.461897850036621, + "learning_rate": 1.6386216105428993e-06, + "loss": 0.0348, + "num_input_tokens_seen": 6556160, + "step": 13320 + }, + { + "epoch": 1.7586115876996171, + "grad_norm": 0.05416465923190117, + "learning_rate": 1.6382670259414138e-06, + "loss": 0.0428, + "num_input_tokens_seen": 6558336, + "step": 13325 + }, + { + "epoch": 1.7592714794773658, + "grad_norm": 3.9760372638702393, + "learning_rate": 1.637912305872374e-06, + "loss": 0.1086, + "num_input_tokens_seen": 6560960, + "step": 13330 + }, + { + "epoch": 1.7599313712551141, + "grad_norm": 1.265533208847046, + "learning_rate": 1.6375574504110664e-06, + "loss": 0.0529, + "num_input_tokens_seen": 6563200, + "step": 13335 + }, + { + "epoch": 1.7605912630328626, + "grad_norm": 0.09937909245491028, + "learning_rate": 1.637202459632807e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6566144, + "step": 13340 + }, + { + "epoch": 1.7612511548106111, + "grad_norm": 0.016885558143258095, + "learning_rate": 1.6368473336129395e-06, + "loss": 0.1122, + "num_input_tokens_seen": 6568512, + "step": 13345 + }, + { + "epoch": 1.7619110465883594, + "grad_norm": 30.21272087097168, + "learning_rate": 1.6364920724268374e-06, + "loss": 0.1335, + "num_input_tokens_seen": 6570688, + "step": 13350 + }, + { + "epoch": 1.7625709383661081, + "grad_norm": 0.06372368335723877, + "learning_rate": 1.6361366761499023e-06, + "loss": 0.2444, + "num_input_tokens_seen": 6573376, + "step": 13355 + }, + { + "epoch": 1.7632308301438564, + "grad_norm": 0.15929684042930603, + "learning_rate": 1.6357811448575638e-06, + "loss": 0.0877, + "num_input_tokens_seen": 6575488, + "step": 13360 + }, + { + "epoch": 1.763890721921605, + "grad_norm": 0.1368577927350998, + "learning_rate": 1.6354254786252813e-06, + "loss": 0.0697, + "num_input_tokens_seen": 6577856, + "step": 13365 + }, + { + "epoch": 1.7645506136993534, + "grad_norm": 0.12535516917705536, + "learning_rate": 1.6350696775285425e-06, + "loss": 0.0228, + "num_input_tokens_seen": 6580096, + "step": 13370 + }, + { + "epoch": 1.7652105054771017, + "grad_norm": 7.772195339202881, + "learning_rate": 1.6347137416428637e-06, + "loss": 0.0148, + "num_input_tokens_seen": 6582592, + "step": 13375 + }, + { + "epoch": 1.7658703972548502, + "grad_norm": 0.09801344573497772, + "learning_rate": 1.634357671043789e-06, + "loss": 0.1398, + "num_input_tokens_seen": 6585216, + "step": 13380 + }, + { + "epoch": 1.7665302890325987, + "grad_norm": 0.25059768557548523, + "learning_rate": 1.6340014658068923e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6588032, + "step": 13385 + }, + { + "epoch": 1.767190180810347, + "grad_norm": 0.02534446120262146, + "learning_rate": 1.6336451260077757e-06, + "loss": 0.0724, + "num_input_tokens_seen": 6590528, + "step": 13390 + }, + { + "epoch": 1.7678500725880957, + "grad_norm": 0.018427465111017227, + "learning_rate": 1.6332886517220694e-06, + "loss": 0.1463, + "num_input_tokens_seen": 6592960, + "step": 13395 + }, + { + "epoch": 1.768509964365844, + "grad_norm": 0.08824898302555084, + "learning_rate": 1.632932043025433e-06, + "loss": 0.0916, + "num_input_tokens_seen": 6595136, + "step": 13400 + }, + { + "epoch": 1.7691698561435925, + "grad_norm": 47.151695251464844, + "learning_rate": 1.6325752999935539e-06, + "loss": 0.2149, + "num_input_tokens_seen": 6597824, + "step": 13405 + }, + { + "epoch": 1.769829747921341, + "grad_norm": 2.347273349761963, + "learning_rate": 1.6322184227021479e-06, + "loss": 0.0036, + "num_input_tokens_seen": 6600128, + "step": 13410 + }, + { + "epoch": 1.7704896396990892, + "grad_norm": 0.21026629209518433, + "learning_rate": 1.6318614112269598e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6602944, + "step": 13415 + }, + { + "epoch": 1.771149531476838, + "grad_norm": 0.20188181102275848, + "learning_rate": 1.631504265643763e-06, + "loss": 0.1428, + "num_input_tokens_seen": 6605184, + "step": 13420 + }, + { + "epoch": 1.7718094232545862, + "grad_norm": 0.3236222267150879, + "learning_rate": 1.6311469860283584e-06, + "loss": 0.0775, + "num_input_tokens_seen": 6607680, + "step": 13425 + }, + { + "epoch": 1.7724693150323347, + "grad_norm": 0.04968617111444473, + "learning_rate": 1.6307895724565768e-06, + "loss": 0.0653, + "num_input_tokens_seen": 6610048, + "step": 13430 + }, + { + "epoch": 1.7731292068100832, + "grad_norm": 17.058731079101562, + "learning_rate": 1.6304320250042761e-06, + "loss": 0.0665, + "num_input_tokens_seen": 6612480, + "step": 13435 + }, + { + "epoch": 1.7737890985878315, + "grad_norm": 0.0495927557349205, + "learning_rate": 1.6300743437473434e-06, + "loss": 0.0553, + "num_input_tokens_seen": 6614784, + "step": 13440 + }, + { + "epoch": 1.77444899036558, + "grad_norm": 0.4401050806045532, + "learning_rate": 1.6297165287616936e-06, + "loss": 0.0466, + "num_input_tokens_seen": 6617152, + "step": 13445 + }, + { + "epoch": 1.7751088821433285, + "grad_norm": 18.053531646728516, + "learning_rate": 1.629358580123271e-06, + "loss": 0.2752, + "num_input_tokens_seen": 6619648, + "step": 13450 + }, + { + "epoch": 1.7757687739210768, + "grad_norm": 4.440788745880127, + "learning_rate": 1.6290004979080473e-06, + "loss": 0.0518, + "num_input_tokens_seen": 6622016, + "step": 13455 + }, + { + "epoch": 1.7764286656988255, + "grad_norm": 0.09185050427913666, + "learning_rate": 1.6286422821920222e-06, + "loss": 0.1611, + "num_input_tokens_seen": 6624384, + "step": 13460 + }, + { + "epoch": 1.7770885574765738, + "grad_norm": 0.1410074383020401, + "learning_rate": 1.6282839330512252e-06, + "loss": 0.1648, + "num_input_tokens_seen": 6626880, + "step": 13465 + }, + { + "epoch": 1.7777484492543223, + "grad_norm": 118.95601654052734, + "learning_rate": 1.6279254505617134e-06, + "loss": 0.0707, + "num_input_tokens_seen": 6629760, + "step": 13470 + }, + { + "epoch": 1.7784083410320708, + "grad_norm": 0.0755147859454155, + "learning_rate": 1.6275668347995714e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6632576, + "step": 13475 + }, + { + "epoch": 1.779068232809819, + "grad_norm": 0.015661204233765602, + "learning_rate": 1.6272080858409138e-06, + "loss": 0.0429, + "num_input_tokens_seen": 6635136, + "step": 13480 + }, + { + "epoch": 1.7797281245875678, + "grad_norm": 0.10322346538305283, + "learning_rate": 1.6268492037618815e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6637440, + "step": 13485 + }, + { + "epoch": 1.780388016365316, + "grad_norm": 0.12164479494094849, + "learning_rate": 1.6264901886386448e-06, + "loss": 0.0592, + "num_input_tokens_seen": 6640320, + "step": 13490 + }, + { + "epoch": 1.7810479081430646, + "grad_norm": 0.055566031485795975, + "learning_rate": 1.6261310405474022e-06, + "loss": 0.1364, + "num_input_tokens_seen": 6643136, + "step": 13495 + }, + { + "epoch": 1.781707799920813, + "grad_norm": 80.7416763305664, + "learning_rate": 1.6257717595643807e-06, + "loss": 0.0181, + "num_input_tokens_seen": 6645568, + "step": 13500 + }, + { + "epoch": 1.7823676916985614, + "grad_norm": 84.49398803710938, + "learning_rate": 1.6254123457658346e-06, + "loss": 0.1195, + "num_input_tokens_seen": 6648128, + "step": 13505 + }, + { + "epoch": 1.7830275834763099, + "grad_norm": 0.12812452018260956, + "learning_rate": 1.625052799228047e-06, + "loss": 0.0976, + "num_input_tokens_seen": 6650880, + "step": 13510 + }, + { + "epoch": 1.7836874752540584, + "grad_norm": 24.594783782958984, + "learning_rate": 1.624693120027329e-06, + "loss": 0.0987, + "num_input_tokens_seen": 6653504, + "step": 13515 + }, + { + "epoch": 1.7843473670318066, + "grad_norm": 0.302664190530777, + "learning_rate": 1.6243333082400197e-06, + "loss": 0.0598, + "num_input_tokens_seen": 6655936, + "step": 13520 + }, + { + "epoch": 1.7850072588095554, + "grad_norm": 0.029307713732123375, + "learning_rate": 1.623973363942487e-06, + "loss": 0.0493, + "num_input_tokens_seen": 6658432, + "step": 13525 + }, + { + "epoch": 1.7856671505873036, + "grad_norm": 0.23820511996746063, + "learning_rate": 1.6236132872111266e-06, + "loss": 0.0007, + "num_input_tokens_seen": 6660800, + "step": 13530 + }, + { + "epoch": 1.7863270423650521, + "grad_norm": 0.1212558001279831, + "learning_rate": 1.6232530781223613e-06, + "loss": 0.0938, + "num_input_tokens_seen": 6663232, + "step": 13535 + }, + { + "epoch": 1.7869869341428006, + "grad_norm": 21.21772575378418, + "learning_rate": 1.6228927367526437e-06, + "loss": 0.1012, + "num_input_tokens_seen": 6665792, + "step": 13540 + }, + { + "epoch": 1.787646825920549, + "grad_norm": 0.670257568359375, + "learning_rate": 1.6225322631784533e-06, + "loss": 0.0525, + "num_input_tokens_seen": 6668352, + "step": 13545 + }, + { + "epoch": 1.7883067176982976, + "grad_norm": 0.9238318204879761, + "learning_rate": 1.622171657476298e-06, + "loss": 0.0015, + "num_input_tokens_seen": 6670976, + "step": 13550 + }, + { + "epoch": 1.788966609476046, + "grad_norm": 0.06575516611337662, + "learning_rate": 1.621810919722714e-06, + "loss": 0.0724, + "num_input_tokens_seen": 6673472, + "step": 13555 + }, + { + "epoch": 1.7896265012537944, + "grad_norm": 0.31715166568756104, + "learning_rate": 1.6214500499942649e-06, + "loss": 0.0431, + "num_input_tokens_seen": 6675904, + "step": 13560 + }, + { + "epoch": 1.790286393031543, + "grad_norm": 0.0651044175028801, + "learning_rate": 1.6210890483675427e-06, + "loss": 0.1878, + "num_input_tokens_seen": 6678208, + "step": 13565 + }, + { + "epoch": 1.7909462848092912, + "grad_norm": 0.027278663590550423, + "learning_rate": 1.620727914919168e-06, + "loss": 0.0459, + "num_input_tokens_seen": 6680704, + "step": 13570 + }, + { + "epoch": 1.7916061765870397, + "grad_norm": 0.015751399099826813, + "learning_rate": 1.620366649725788e-06, + "loss": 0.0964, + "num_input_tokens_seen": 6683136, + "step": 13575 + }, + { + "epoch": 1.7922660683647882, + "grad_norm": 22.279233932495117, + "learning_rate": 1.6200052528640792e-06, + "loss": 0.0529, + "num_input_tokens_seen": 6685632, + "step": 13580 + }, + { + "epoch": 1.7929259601425365, + "grad_norm": 12.878225326538086, + "learning_rate": 1.619643724410745e-06, + "loss": 0.1235, + "num_input_tokens_seen": 6687872, + "step": 13585 + }, + { + "epoch": 1.7935858519202852, + "grad_norm": 0.34992167353630066, + "learning_rate": 1.6192820644425176e-06, + "loss": 0.002, + "num_input_tokens_seen": 6690368, + "step": 13590 + }, + { + "epoch": 1.7942457436980335, + "grad_norm": 17.81177520751953, + "learning_rate": 1.6189202730361563e-06, + "loss": 0.0692, + "num_input_tokens_seen": 6692992, + "step": 13595 + }, + { + "epoch": 1.794905635475782, + "grad_norm": 135.81553649902344, + "learning_rate": 1.618558350268449e-06, + "loss": 0.0568, + "num_input_tokens_seen": 6695168, + "step": 13600 + }, + { + "epoch": 1.7955655272535305, + "grad_norm": 0.016782505437731743, + "learning_rate": 1.618196296216211e-06, + "loss": 0.1914, + "num_input_tokens_seen": 6697536, + "step": 13605 + }, + { + "epoch": 1.7962254190312787, + "grad_norm": 0.269696980714798, + "learning_rate": 1.6178341109562859e-06, + "loss": 0.073, + "num_input_tokens_seen": 6700288, + "step": 13610 + }, + { + "epoch": 1.7968853108090275, + "grad_norm": 3.4058849811553955, + "learning_rate": 1.6174717945655446e-06, + "loss": 0.0581, + "num_input_tokens_seen": 6702912, + "step": 13615 + }, + { + "epoch": 1.7975452025867757, + "grad_norm": 0.19131074845790863, + "learning_rate": 1.6171093471208863e-06, + "loss": 0.083, + "num_input_tokens_seen": 6705408, + "step": 13620 + }, + { + "epoch": 1.7982050943645242, + "grad_norm": 0.05940356105566025, + "learning_rate": 1.616746768699238e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6707904, + "step": 13625 + }, + { + "epoch": 1.7988649861422727, + "grad_norm": 18.854990005493164, + "learning_rate": 1.616384059377554e-06, + "loss": 0.0738, + "num_input_tokens_seen": 6710400, + "step": 13630 + }, + { + "epoch": 1.799524877920021, + "grad_norm": 0.020753202959895134, + "learning_rate": 1.616021219232817e-06, + "loss": 0.0009, + "num_input_tokens_seen": 6712896, + "step": 13635 + }, + { + "epoch": 1.8001847696977695, + "grad_norm": 0.0038060767110437155, + "learning_rate": 1.6156582483420374e-06, + "loss": 0.0371, + "num_input_tokens_seen": 6715520, + "step": 13640 + }, + { + "epoch": 1.800844661475518, + "grad_norm": 0.3243776857852936, + "learning_rate": 1.6152951467822523e-06, + "loss": 0.1065, + "num_input_tokens_seen": 6718080, + "step": 13645 + }, + { + "epoch": 1.8015045532532663, + "grad_norm": 54.605873107910156, + "learning_rate": 1.614931914630528e-06, + "loss": 0.1254, + "num_input_tokens_seen": 6720320, + "step": 13650 + }, + { + "epoch": 1.802164445031015, + "grad_norm": 66.58609771728516, + "learning_rate": 1.6145685519639577e-06, + "loss": 0.0896, + "num_input_tokens_seen": 6722816, + "step": 13655 + }, + { + "epoch": 1.8028243368087633, + "grad_norm": 0.15989159047603607, + "learning_rate": 1.6142050588596631e-06, + "loss": 0.0551, + "num_input_tokens_seen": 6725120, + "step": 13660 + }, + { + "epoch": 1.8034842285865118, + "grad_norm": 8.184621810913086, + "learning_rate": 1.6138414353947923e-06, + "loss": 0.0597, + "num_input_tokens_seen": 6727616, + "step": 13665 + }, + { + "epoch": 1.8041441203642603, + "grad_norm": 0.01618696190416813, + "learning_rate": 1.613477681646522e-06, + "loss": 0.0805, + "num_input_tokens_seen": 6730240, + "step": 13670 + }, + { + "epoch": 1.8048040121420086, + "grad_norm": 17.110807418823242, + "learning_rate": 1.6131137976920556e-06, + "loss": 0.0583, + "num_input_tokens_seen": 6732608, + "step": 13675 + }, + { + "epoch": 1.8054639039197573, + "grad_norm": 15.315784454345703, + "learning_rate": 1.612749783608626e-06, + "loss": 0.1257, + "num_input_tokens_seen": 6735168, + "step": 13680 + }, + { + "epoch": 1.8061237956975056, + "grad_norm": 0.023321373388171196, + "learning_rate": 1.612385639473492e-06, + "loss": 0.1853, + "num_input_tokens_seen": 6737664, + "step": 13685 + }, + { + "epoch": 1.806783687475254, + "grad_norm": 0.15307167172431946, + "learning_rate": 1.6120213653639407e-06, + "loss": 0.0448, + "num_input_tokens_seen": 6740224, + "step": 13690 + }, + { + "epoch": 1.8074435792530026, + "grad_norm": 5.365473747253418, + "learning_rate": 1.6116569613572861e-06, + "loss": 0.0295, + "num_input_tokens_seen": 6742848, + "step": 13695 + }, + { + "epoch": 1.8081034710307509, + "grad_norm": 0.040078677237033844, + "learning_rate": 1.611292427530871e-06, + "loss": 0.1216, + "num_input_tokens_seen": 6745472, + "step": 13700 + }, + { + "epoch": 1.8087633628084994, + "grad_norm": 11.159900665283203, + "learning_rate": 1.6109277639620648e-06, + "loss": 0.0853, + "num_input_tokens_seen": 6747904, + "step": 13705 + }, + { + "epoch": 1.8094232545862479, + "grad_norm": 0.08733681589365005, + "learning_rate": 1.6105629707282645e-06, + "loss": 0.1297, + "num_input_tokens_seen": 6750592, + "step": 13710 + }, + { + "epoch": 1.8100831463639961, + "grad_norm": 0.5698712468147278, + "learning_rate": 1.6101980479068954e-06, + "loss": 0.0997, + "num_input_tokens_seen": 6753024, + "step": 13715 + }, + { + "epoch": 1.8107430381417449, + "grad_norm": 14.043471336364746, + "learning_rate": 1.609832995575409e-06, + "loss": 0.0846, + "num_input_tokens_seen": 6755392, + "step": 13720 + }, + { + "epoch": 1.8114029299194931, + "grad_norm": 0.5709534287452698, + "learning_rate": 1.6094678138112854e-06, + "loss": 0.0217, + "num_input_tokens_seen": 6757696, + "step": 13725 + }, + { + "epoch": 1.8120628216972416, + "grad_norm": 0.6098952293395996, + "learning_rate": 1.6091025026920316e-06, + "loss": 0.0975, + "num_input_tokens_seen": 6760128, + "step": 13730 + }, + { + "epoch": 1.8127227134749901, + "grad_norm": 0.5241377353668213, + "learning_rate": 1.6087370622951824e-06, + "loss": 0.122, + "num_input_tokens_seen": 6762496, + "step": 13735 + }, + { + "epoch": 1.8133826052527384, + "grad_norm": 0.034056950360536575, + "learning_rate": 1.6083714926983004e-06, + "loss": 0.0623, + "num_input_tokens_seen": 6765120, + "step": 13740 + }, + { + "epoch": 1.8140424970304871, + "grad_norm": 0.13510484993457794, + "learning_rate": 1.608005793978974e-06, + "loss": 0.0799, + "num_input_tokens_seen": 6767808, + "step": 13745 + }, + { + "epoch": 1.8147023888082354, + "grad_norm": 29.59540557861328, + "learning_rate": 1.6076399662148207e-06, + "loss": 0.1322, + "num_input_tokens_seen": 6770240, + "step": 13750 + }, + { + "epoch": 1.815362280585984, + "grad_norm": 0.1105113998055458, + "learning_rate": 1.6072740094834848e-06, + "loss": 0.0825, + "num_input_tokens_seen": 6772608, + "step": 13755 + }, + { + "epoch": 1.8160221723637324, + "grad_norm": 46.684200286865234, + "learning_rate": 1.606907923862638e-06, + "loss": 0.1762, + "num_input_tokens_seen": 6774976, + "step": 13760 + }, + { + "epoch": 1.8166820641414807, + "grad_norm": 0.05612744390964508, + "learning_rate": 1.6065417094299793e-06, + "loss": 0.0425, + "num_input_tokens_seen": 6777408, + "step": 13765 + }, + { + "epoch": 1.8173419559192292, + "grad_norm": 0.03116508573293686, + "learning_rate": 1.6061753662632352e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6779584, + "step": 13770 + }, + { + "epoch": 1.8180018476969777, + "grad_norm": 0.39724618196487427, + "learning_rate": 1.6058088944401586e-06, + "loss": 0.0566, + "num_input_tokens_seen": 6782016, + "step": 13775 + }, + { + "epoch": 1.818661739474726, + "grad_norm": 0.18111243844032288, + "learning_rate": 1.6054422940385315e-06, + "loss": 0.07, + "num_input_tokens_seen": 6784576, + "step": 13780 + }, + { + "epoch": 1.8193216312524747, + "grad_norm": 0.08874604851007462, + "learning_rate": 1.6050755651361617e-06, + "loss": 0.0423, + "num_input_tokens_seen": 6787072, + "step": 13785 + }, + { + "epoch": 1.819981523030223, + "grad_norm": 0.08461711555719376, + "learning_rate": 1.6047087078108848e-06, + "loss": 0.1433, + "num_input_tokens_seen": 6789376, + "step": 13790 + }, + { + "epoch": 1.8206414148079715, + "grad_norm": 0.2662123441696167, + "learning_rate": 1.6043417221405636e-06, + "loss": 0.0027, + "num_input_tokens_seen": 6792000, + "step": 13795 + }, + { + "epoch": 1.82130130658572, + "grad_norm": 15.94675064086914, + "learning_rate": 1.6039746082030878e-06, + "loss": 0.0624, + "num_input_tokens_seen": 6794368, + "step": 13800 + }, + { + "epoch": 1.8219611983634683, + "grad_norm": 0.13098464906215668, + "learning_rate": 1.6036073660763755e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6796800, + "step": 13805 + }, + { + "epoch": 1.822621090141217, + "grad_norm": 51.529579162597656, + "learning_rate": 1.6032399958383706e-06, + "loss": 0.0646, + "num_input_tokens_seen": 6799232, + "step": 13810 + }, + { + "epoch": 1.8232809819189653, + "grad_norm": 13.87507152557373, + "learning_rate": 1.6028724975670454e-06, + "loss": 0.1131, + "num_input_tokens_seen": 6801792, + "step": 13815 + }, + { + "epoch": 1.8239408736967138, + "grad_norm": 0.11702122539281845, + "learning_rate": 1.6025048713403977e-06, + "loss": 0.1451, + "num_input_tokens_seen": 6804416, + "step": 13820 + }, + { + "epoch": 1.8246007654744623, + "grad_norm": 12.326350212097168, + "learning_rate": 1.6021371172364543e-06, + "loss": 0.0421, + "num_input_tokens_seen": 6806912, + "step": 13825 + }, + { + "epoch": 1.8252606572522105, + "grad_norm": 0.25664016604423523, + "learning_rate": 1.6017692353332676e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6809280, + "step": 13830 + }, + { + "epoch": 1.825920549029959, + "grad_norm": 0.08013727515935898, + "learning_rate": 1.6014012257089186e-06, + "loss": 0.0015, + "num_input_tokens_seen": 6811776, + "step": 13835 + }, + { + "epoch": 1.8265804408077075, + "grad_norm": 0.02246575430035591, + "learning_rate": 1.6010330884415146e-06, + "loss": 0.0004, + "num_input_tokens_seen": 6814080, + "step": 13840 + }, + { + "epoch": 1.827240332585456, + "grad_norm": 0.10979142040014267, + "learning_rate": 1.6006648236091903e-06, + "loss": 0.088, + "num_input_tokens_seen": 6816512, + "step": 13845 + }, + { + "epoch": 1.8279002243632045, + "grad_norm": 0.033827897161245346, + "learning_rate": 1.600296431290106e-06, + "loss": 0.0002, + "num_input_tokens_seen": 6818752, + "step": 13850 + }, + { + "epoch": 1.8285601161409528, + "grad_norm": 0.05608005449175835, + "learning_rate": 1.5999279115624517e-06, + "loss": 0.1294, + "num_input_tokens_seen": 6821248, + "step": 13855 + }, + { + "epoch": 1.8292200079187013, + "grad_norm": 0.1876734495162964, + "learning_rate": 1.5995592645044424e-06, + "loss": 0.1328, + "num_input_tokens_seen": 6823808, + "step": 13860 + }, + { + "epoch": 1.8298798996964498, + "grad_norm": 0.1528148502111435, + "learning_rate": 1.599190490194321e-06, + "loss": 0.1448, + "num_input_tokens_seen": 6826432, + "step": 13865 + }, + { + "epoch": 1.830539791474198, + "grad_norm": 0.06932291388511658, + "learning_rate": 1.5988215887103568e-06, + "loss": 0.0097, + "num_input_tokens_seen": 6828800, + "step": 13870 + }, + { + "epoch": 1.8311996832519468, + "grad_norm": 0.004690032918006182, + "learning_rate": 1.598452560130847e-06, + "loss": 0.1828, + "num_input_tokens_seen": 6831360, + "step": 13875 + }, + { + "epoch": 1.831859575029695, + "grad_norm": 28.09312629699707, + "learning_rate": 1.598083404534115e-06, + "loss": 0.1433, + "num_input_tokens_seen": 6833664, + "step": 13880 + }, + { + "epoch": 1.8325194668074436, + "grad_norm": 0.13816773891448975, + "learning_rate": 1.597714121998511e-06, + "loss": 0.065, + "num_input_tokens_seen": 6835904, + "step": 13885 + }, + { + "epoch": 1.833179358585192, + "grad_norm": 0.2143886536359787, + "learning_rate": 1.5973447126024131e-06, + "loss": 0.1494, + "num_input_tokens_seen": 6838336, + "step": 13890 + }, + { + "epoch": 1.8338392503629404, + "grad_norm": 0.11392121762037277, + "learning_rate": 1.596975176424226e-06, + "loss": 0.2852, + "num_input_tokens_seen": 6841024, + "step": 13895 + }, + { + "epoch": 1.8344991421406889, + "grad_norm": 0.045107193291187286, + "learning_rate": 1.5966055135423798e-06, + "loss": 0.1151, + "num_input_tokens_seen": 6843200, + "step": 13900 + }, + { + "epoch": 1.8351590339184374, + "grad_norm": 0.13529819250106812, + "learning_rate": 1.5962357240353342e-06, + "loss": 0.004, + "num_input_tokens_seen": 6845568, + "step": 13905 + }, + { + "epoch": 1.8358189256961859, + "grad_norm": 0.3247222900390625, + "learning_rate": 1.5958658079815737e-06, + "loss": 0.0585, + "num_input_tokens_seen": 6848000, + "step": 13910 + }, + { + "epoch": 1.8364788174739344, + "grad_norm": 20.190378189086914, + "learning_rate": 1.5954957654596102e-06, + "loss": 0.0423, + "num_input_tokens_seen": 6850688, + "step": 13915 + }, + { + "epoch": 1.8371387092516827, + "grad_norm": 0.09919846802949905, + "learning_rate": 1.595125596547983e-06, + "loss": 0.0121, + "num_input_tokens_seen": 6852928, + "step": 13920 + }, + { + "epoch": 1.8377986010294312, + "grad_norm": 0.05398841202259064, + "learning_rate": 1.5947553013252572e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6855424, + "step": 13925 + }, + { + "epoch": 1.8384584928071797, + "grad_norm": 0.525188684463501, + "learning_rate": 1.594384879870026e-06, + "loss": 0.0626, + "num_input_tokens_seen": 6858048, + "step": 13930 + }, + { + "epoch": 1.839118384584928, + "grad_norm": 65.14164733886719, + "learning_rate": 1.594014332260908e-06, + "loss": 0.0881, + "num_input_tokens_seen": 6860672, + "step": 13935 + }, + { + "epoch": 1.8397782763626767, + "grad_norm": 45.95109939575195, + "learning_rate": 1.5936436585765493e-06, + "loss": 0.0854, + "num_input_tokens_seen": 6863104, + "step": 13940 + }, + { + "epoch": 1.840438168140425, + "grad_norm": 0.05436606705188751, + "learning_rate": 1.5932728588956233e-06, + "loss": 0.1164, + "num_input_tokens_seen": 6865472, + "step": 13945 + }, + { + "epoch": 1.8410980599181734, + "grad_norm": 0.17238880693912506, + "learning_rate": 1.5929019332968285e-06, + "loss": 0.0017, + "num_input_tokens_seen": 6867840, + "step": 13950 + }, + { + "epoch": 1.841757951695922, + "grad_norm": 0.043068185448646545, + "learning_rate": 1.5925308818588926e-06, + "loss": 0.0018, + "num_input_tokens_seen": 6870208, + "step": 13955 + }, + { + "epoch": 1.8424178434736702, + "grad_norm": 0.03494265675544739, + "learning_rate": 1.5921597046605672e-06, + "loss": 0.055, + "num_input_tokens_seen": 6872768, + "step": 13960 + }, + { + "epoch": 1.8430777352514187, + "grad_norm": 0.020545775070786476, + "learning_rate": 1.5917884017806327e-06, + "loss": 0.046, + "num_input_tokens_seen": 6875328, + "step": 13965 + }, + { + "epoch": 1.8437376270291672, + "grad_norm": 0.12847785651683807, + "learning_rate": 1.5914169732978957e-06, + "loss": 0.1396, + "num_input_tokens_seen": 6877632, + "step": 13970 + }, + { + "epoch": 1.8443975188069157, + "grad_norm": 0.058383919298648834, + "learning_rate": 1.5910454192911883e-06, + "loss": 0.1319, + "num_input_tokens_seen": 6879872, + "step": 13975 + }, + { + "epoch": 1.8450574105846642, + "grad_norm": 0.02930704690515995, + "learning_rate": 1.590673739839371e-06, + "loss": 0.092, + "num_input_tokens_seen": 6882688, + "step": 13980 + }, + { + "epoch": 1.8457173023624125, + "grad_norm": 0.02760966680943966, + "learning_rate": 1.5903019350213293e-06, + "loss": 0.0631, + "num_input_tokens_seen": 6885248, + "step": 13985 + }, + { + "epoch": 1.846377194140161, + "grad_norm": 0.5269352197647095, + "learning_rate": 1.589930004915977e-06, + "loss": 0.0017, + "num_input_tokens_seen": 6887552, + "step": 13990 + }, + { + "epoch": 1.8470370859179095, + "grad_norm": 0.006367943715304136, + "learning_rate": 1.5895579496022532e-06, + "loss": 0.0009, + "num_input_tokens_seen": 6889984, + "step": 13995 + }, + { + "epoch": 1.8476969776956578, + "grad_norm": 10.508081436157227, + "learning_rate": 1.5891857691591235e-06, + "loss": 0.3151, + "num_input_tokens_seen": 6892480, + "step": 14000 + }, + { + "epoch": 1.8483568694734065, + "grad_norm": 0.14122456312179565, + "learning_rate": 1.588813463665581e-06, + "loss": 0.0002, + "num_input_tokens_seen": 6894912, + "step": 14005 + }, + { + "epoch": 1.8490167612511548, + "grad_norm": 0.08993390202522278, + "learning_rate": 1.5884410332006443e-06, + "loss": 0.1177, + "num_input_tokens_seen": 6897408, + "step": 14010 + }, + { + "epoch": 1.8496766530289033, + "grad_norm": 0.21386578679084778, + "learning_rate": 1.58806847784336e-06, + "loss": 0.0403, + "num_input_tokens_seen": 6899712, + "step": 14015 + }, + { + "epoch": 1.8503365448066518, + "grad_norm": 81.02230834960938, + "learning_rate": 1.5876957976727993e-06, + "loss": 0.1096, + "num_input_tokens_seen": 6902016, + "step": 14020 + }, + { + "epoch": 1.8509964365844, + "grad_norm": 17.306926727294922, + "learning_rate": 1.5873229927680617e-06, + "loss": 0.1389, + "num_input_tokens_seen": 6904384, + "step": 14025 + }, + { + "epoch": 1.8516563283621488, + "grad_norm": 0.20489084720611572, + "learning_rate": 1.5869500632082717e-06, + "loss": 0.0433, + "num_input_tokens_seen": 6906816, + "step": 14030 + }, + { + "epoch": 1.852316220139897, + "grad_norm": 0.8502187132835388, + "learning_rate": 1.586577009072581e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6909376, + "step": 14035 + }, + { + "epoch": 1.8529761119176456, + "grad_norm": 0.016757525503635406, + "learning_rate": 1.5862038304401682e-06, + "loss": 0.0512, + "num_input_tokens_seen": 6911680, + "step": 14040 + }, + { + "epoch": 1.853636003695394, + "grad_norm": 0.11623700708150864, + "learning_rate": 1.585830527390237e-06, + "loss": 0.0724, + "num_input_tokens_seen": 6913920, + "step": 14045 + }, + { + "epoch": 1.8542958954731423, + "grad_norm": 58.12702560424805, + "learning_rate": 1.585457100002019e-06, + "loss": 0.189, + "num_input_tokens_seen": 6916416, + "step": 14050 + }, + { + "epoch": 1.8549557872508908, + "grad_norm": 0.018467986956238747, + "learning_rate": 1.5850835483547705e-06, + "loss": 0.0629, + "num_input_tokens_seen": 6918848, + "step": 14055 + }, + { + "epoch": 1.8556156790286393, + "grad_norm": 66.30414581298828, + "learning_rate": 1.5847098725277763e-06, + "loss": 0.0666, + "num_input_tokens_seen": 6921536, + "step": 14060 + }, + { + "epoch": 1.8562755708063876, + "grad_norm": 0.01666739024221897, + "learning_rate": 1.5843360726003454e-06, + "loss": 0.0795, + "num_input_tokens_seen": 6924096, + "step": 14065 + }, + { + "epoch": 1.8569354625841363, + "grad_norm": 0.11734547466039658, + "learning_rate": 1.5839621486518144e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6926400, + "step": 14070 + }, + { + "epoch": 1.8575953543618846, + "grad_norm": 21.82460594177246, + "learning_rate": 1.5835881007615466e-06, + "loss": 0.2018, + "num_input_tokens_seen": 6928896, + "step": 14075 + }, + { + "epoch": 1.858255246139633, + "grad_norm": 0.7360246777534485, + "learning_rate": 1.5832139290089302e-06, + "loss": 0.184, + "num_input_tokens_seen": 6931392, + "step": 14080 + }, + { + "epoch": 1.8589151379173816, + "grad_norm": 2.7356605529785156, + "learning_rate": 1.5828396334733807e-06, + "loss": 0.0733, + "num_input_tokens_seen": 6934080, + "step": 14085 + }, + { + "epoch": 1.8595750296951299, + "grad_norm": 50.23606491088867, + "learning_rate": 1.5824652142343394e-06, + "loss": 0.1321, + "num_input_tokens_seen": 6936576, + "step": 14090 + }, + { + "epoch": 1.8602349214728786, + "grad_norm": 21.126819610595703, + "learning_rate": 1.582090671371274e-06, + "loss": 0.1625, + "num_input_tokens_seen": 6938880, + "step": 14095 + }, + { + "epoch": 1.860894813250627, + "grad_norm": 12.098738670349121, + "learning_rate": 1.5817160049636792e-06, + "loss": 0.1393, + "num_input_tokens_seen": 6941504, + "step": 14100 + }, + { + "epoch": 1.8615547050283754, + "grad_norm": 67.69693756103516, + "learning_rate": 1.5813412150910748e-06, + "loss": 0.0703, + "num_input_tokens_seen": 6944000, + "step": 14105 + }, + { + "epoch": 1.862214596806124, + "grad_norm": 0.11709312349557877, + "learning_rate": 1.580966301833007e-06, + "loss": 0.1415, + "num_input_tokens_seen": 6946496, + "step": 14110 + }, + { + "epoch": 1.8628744885838722, + "grad_norm": 0.06242687627673149, + "learning_rate": 1.580591265269049e-06, + "loss": 0.0015, + "num_input_tokens_seen": 6948864, + "step": 14115 + }, + { + "epoch": 1.8635343803616207, + "grad_norm": 0.1491411030292511, + "learning_rate": 1.580216105478799e-06, + "loss": 0.0012, + "num_input_tokens_seen": 6951360, + "step": 14120 + }, + { + "epoch": 1.8641942721393692, + "grad_norm": 0.5130274295806885, + "learning_rate": 1.5798408225418825e-06, + "loss": 0.0014, + "num_input_tokens_seen": 6953856, + "step": 14125 + }, + { + "epoch": 1.8648541639171174, + "grad_norm": 0.14763778448104858, + "learning_rate": 1.57946541653795e-06, + "loss": 0.0719, + "num_input_tokens_seen": 6956096, + "step": 14130 + }, + { + "epoch": 1.8655140556948662, + "grad_norm": 0.16149531304836273, + "learning_rate": 1.579089887546679e-06, + "loss": 0.0571, + "num_input_tokens_seen": 6958528, + "step": 14135 + }, + { + "epoch": 1.8661739474726144, + "grad_norm": 0.08772195875644684, + "learning_rate": 1.578714235647773e-06, + "loss": 0.1446, + "num_input_tokens_seen": 6960704, + "step": 14140 + }, + { + "epoch": 1.866833839250363, + "grad_norm": 2.0068814754486084, + "learning_rate": 1.5783384609209609e-06, + "loss": 0.1826, + "num_input_tokens_seen": 6963072, + "step": 14145 + }, + { + "epoch": 1.8674937310281114, + "grad_norm": 0.1501556634902954, + "learning_rate": 1.577962563445999e-06, + "loss": 0.0619, + "num_input_tokens_seen": 6965568, + "step": 14150 + }, + { + "epoch": 1.8681536228058597, + "grad_norm": 0.060296908020973206, + "learning_rate": 1.5775865433026679e-06, + "loss": 0.0009, + "num_input_tokens_seen": 6967936, + "step": 14155 + }, + { + "epoch": 1.8688135145836084, + "grad_norm": 0.20123106241226196, + "learning_rate": 1.5772104005707756e-06, + "loss": 0.0008, + "num_input_tokens_seen": 6970432, + "step": 14160 + }, + { + "epoch": 1.8694734063613567, + "grad_norm": 0.2809836268424988, + "learning_rate": 1.5768341353301554e-06, + "loss": 0.1136, + "num_input_tokens_seen": 6973120, + "step": 14165 + }, + { + "epoch": 1.8701332981391052, + "grad_norm": 0.12703171372413635, + "learning_rate": 1.5764577476606673e-06, + "loss": 0.0006, + "num_input_tokens_seen": 6975296, + "step": 14170 + }, + { + "epoch": 1.8707931899168537, + "grad_norm": 29.460742950439453, + "learning_rate": 1.5760812376421965e-06, + "loss": 0.0891, + "num_input_tokens_seen": 6977728, + "step": 14175 + }, + { + "epoch": 1.871453081694602, + "grad_norm": 0.19353777170181274, + "learning_rate": 1.5757046053546547e-06, + "loss": 0.1602, + "num_input_tokens_seen": 6980160, + "step": 14180 + }, + { + "epoch": 1.8721129734723505, + "grad_norm": 2.112881660461426, + "learning_rate": 1.5753278508779797e-06, + "loss": 0.081, + "num_input_tokens_seen": 6982720, + "step": 14185 + }, + { + "epoch": 1.872772865250099, + "grad_norm": 0.19052846729755402, + "learning_rate": 1.574950974292134e-06, + "loss": 0.2397, + "num_input_tokens_seen": 6985344, + "step": 14190 + }, + { + "epoch": 1.8734327570278473, + "grad_norm": 0.056706760078668594, + "learning_rate": 1.5745739756771078e-06, + "loss": 0.0029, + "num_input_tokens_seen": 6987776, + "step": 14195 + }, + { + "epoch": 1.874092648805596, + "grad_norm": 0.618067741394043, + "learning_rate": 1.574196855112916e-06, + "loss": 0.0016, + "num_input_tokens_seen": 6990144, + "step": 14200 + }, + { + "epoch": 1.8747525405833443, + "grad_norm": 0.07428532093763351, + "learning_rate": 1.5738196126795998e-06, + "loss": 0.0731, + "num_input_tokens_seen": 6992832, + "step": 14205 + }, + { + "epoch": 1.8754124323610928, + "grad_norm": 0.030017530545592308, + "learning_rate": 1.5734422484572258e-06, + "loss": 0.0601, + "num_input_tokens_seen": 6995264, + "step": 14210 + }, + { + "epoch": 1.8760723241388413, + "grad_norm": 0.0610010139644146, + "learning_rate": 1.573064762525887e-06, + "loss": 0.0583, + "num_input_tokens_seen": 6997504, + "step": 14215 + }, + { + "epoch": 1.8767322159165896, + "grad_norm": 0.06411426514387131, + "learning_rate": 1.5726871549657027e-06, + "loss": 0.0743, + "num_input_tokens_seen": 6999872, + "step": 14220 + }, + { + "epoch": 1.8773921076943383, + "grad_norm": 0.03602256253361702, + "learning_rate": 1.5723094258568161e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7002432, + "step": 14225 + }, + { + "epoch": 1.8780519994720866, + "grad_norm": 13.705681800842285, + "learning_rate": 1.571931575279399e-06, + "loss": 0.087, + "num_input_tokens_seen": 7004544, + "step": 14230 + }, + { + "epoch": 1.878711891249835, + "grad_norm": 0.06149383634328842, + "learning_rate": 1.5715536033136462e-06, + "loss": 0.0305, + "num_input_tokens_seen": 7006912, + "step": 14235 + }, + { + "epoch": 1.8793717830275836, + "grad_norm": 33.77265930175781, + "learning_rate": 1.5711755100397798e-06, + "loss": 0.1398, + "num_input_tokens_seen": 7009408, + "step": 14240 + }, + { + "epoch": 1.8800316748053318, + "grad_norm": 0.04353427141904831, + "learning_rate": 1.570797295538048e-06, + "loss": 0.088, + "num_input_tokens_seen": 7011904, + "step": 14245 + }, + { + "epoch": 1.8806915665830803, + "grad_norm": 0.11581678688526154, + "learning_rate": 1.5704189598887232e-06, + "loss": 0.0514, + "num_input_tokens_seen": 7014080, + "step": 14250 + }, + { + "epoch": 1.8813514583608288, + "grad_norm": 0.18477141857147217, + "learning_rate": 1.570040503172105e-06, + "loss": 0.0358, + "num_input_tokens_seen": 7016384, + "step": 14255 + }, + { + "epoch": 1.8820113501385771, + "grad_norm": 18.699533462524414, + "learning_rate": 1.569661925468518e-06, + "loss": 0.246, + "num_input_tokens_seen": 7018560, + "step": 14260 + }, + { + "epoch": 1.8826712419163258, + "grad_norm": 1.352349877357483, + "learning_rate": 1.5692832268583126e-06, + "loss": 0.0017, + "num_input_tokens_seen": 7020928, + "step": 14265 + }, + { + "epoch": 1.8833311336940741, + "grad_norm": 1.0423433780670166, + "learning_rate": 1.5689044074218643e-06, + "loss": 0.2468, + "num_input_tokens_seen": 7023744, + "step": 14270 + }, + { + "epoch": 1.8839910254718226, + "grad_norm": 0.1379714459180832, + "learning_rate": 1.5685254672395753e-06, + "loss": 0.0838, + "num_input_tokens_seen": 7026240, + "step": 14275 + }, + { + "epoch": 1.8846509172495711, + "grad_norm": 0.3078247606754303, + "learning_rate": 1.568146406391873e-06, + "loss": 0.0016, + "num_input_tokens_seen": 7028544, + "step": 14280 + }, + { + "epoch": 1.8853108090273194, + "grad_norm": 0.07992412149906158, + "learning_rate": 1.5677672249592101e-06, + "loss": 0.0511, + "num_input_tokens_seen": 7031040, + "step": 14285 + }, + { + "epoch": 1.8859707008050681, + "grad_norm": 26.23770523071289, + "learning_rate": 1.567387923022065e-06, + "loss": 0.1894, + "num_input_tokens_seen": 7033216, + "step": 14290 + }, + { + "epoch": 1.8866305925828164, + "grad_norm": 0.21263808012008667, + "learning_rate": 1.567008500660942e-06, + "loss": 0.0037, + "num_input_tokens_seen": 7035712, + "step": 14295 + }, + { + "epoch": 1.887290484360565, + "grad_norm": 0.27672702074050903, + "learning_rate": 1.5666289579563708e-06, + "loss": 0.0039, + "num_input_tokens_seen": 7038464, + "step": 14300 + }, + { + "epoch": 1.8879503761383134, + "grad_norm": 15.613324165344238, + "learning_rate": 1.5662492949889065e-06, + "loss": 0.142, + "num_input_tokens_seen": 7040832, + "step": 14305 + }, + { + "epoch": 1.8886102679160617, + "grad_norm": 0.06228020787239075, + "learning_rate": 1.5658695118391299e-06, + "loss": 0.002, + "num_input_tokens_seen": 7043456, + "step": 14310 + }, + { + "epoch": 1.8892701596938102, + "grad_norm": 0.09050249308347702, + "learning_rate": 1.5654896085876468e-06, + "loss": 0.0934, + "num_input_tokens_seen": 7046016, + "step": 14315 + }, + { + "epoch": 1.8899300514715587, + "grad_norm": 1.3224503993988037, + "learning_rate": 1.5651095853150893e-06, + "loss": 0.0622, + "num_input_tokens_seen": 7048704, + "step": 14320 + }, + { + "epoch": 1.890589943249307, + "grad_norm": 74.3990478515625, + "learning_rate": 1.5647294421021144e-06, + "loss": 0.1235, + "num_input_tokens_seen": 7050944, + "step": 14325 + }, + { + "epoch": 1.8912498350270557, + "grad_norm": 0.6878701448440552, + "learning_rate": 1.5643491790294054e-06, + "loss": 0.0469, + "num_input_tokens_seen": 7053312, + "step": 14330 + }, + { + "epoch": 1.891909726804804, + "grad_norm": 0.16525298357009888, + "learning_rate": 1.5639687961776695e-06, + "loss": 0.0867, + "num_input_tokens_seen": 7055936, + "step": 14335 + }, + { + "epoch": 1.8925696185825525, + "grad_norm": 0.5637364983558655, + "learning_rate": 1.5635882936276403e-06, + "loss": 0.0129, + "num_input_tokens_seen": 7058688, + "step": 14340 + }, + { + "epoch": 1.893229510360301, + "grad_norm": 0.1050250306725502, + "learning_rate": 1.5632076714600773e-06, + "loss": 0.0382, + "num_input_tokens_seen": 7061312, + "step": 14345 + }, + { + "epoch": 1.8938894021380492, + "grad_norm": 0.06865517795085907, + "learning_rate": 1.5628269297557644e-06, + "loss": 0.0011, + "num_input_tokens_seen": 7063808, + "step": 14350 + }, + { + "epoch": 1.894549293915798, + "grad_norm": 0.3285222053527832, + "learning_rate": 1.5624460685955115e-06, + "loss": 0.1335, + "num_input_tokens_seen": 7066368, + "step": 14355 + }, + { + "epoch": 1.8952091856935462, + "grad_norm": 1.9790964126586914, + "learning_rate": 1.562065088060153e-06, + "loss": 0.1012, + "num_input_tokens_seen": 7068992, + "step": 14360 + }, + { + "epoch": 1.8958690774712947, + "grad_norm": 0.08310531079769135, + "learning_rate": 1.5616839882305498e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7071296, + "step": 14365 + }, + { + "epoch": 1.8965289692490432, + "grad_norm": 0.013942413963377476, + "learning_rate": 1.5613027691875877e-06, + "loss": 0.0008, + "num_input_tokens_seen": 7073536, + "step": 14370 + }, + { + "epoch": 1.8971888610267915, + "grad_norm": 0.014673193916678429, + "learning_rate": 1.5609214310121775e-06, + "loss": 0.1195, + "num_input_tokens_seen": 7076224, + "step": 14375 + }, + { + "epoch": 1.89784875280454, + "grad_norm": 0.06911315023899078, + "learning_rate": 1.5605399737852554e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7078464, + "step": 14380 + }, + { + "epoch": 1.8985086445822885, + "grad_norm": 0.017595898360013962, + "learning_rate": 1.560158397587783e-06, + "loss": 0.0817, + "num_input_tokens_seen": 7081216, + "step": 14385 + }, + { + "epoch": 1.8991685363600368, + "grad_norm": 0.0426434762775898, + "learning_rate": 1.559776702500747e-06, + "loss": 0.1318, + "num_input_tokens_seen": 7083648, + "step": 14390 + }, + { + "epoch": 1.8998284281377855, + "grad_norm": 0.020100057125091553, + "learning_rate": 1.5593948886051592e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7085952, + "step": 14395 + }, + { + "epoch": 1.9004883199155338, + "grad_norm": 0.6564512848854065, + "learning_rate": 1.5590129559820575e-06, + "loss": 0.1321, + "num_input_tokens_seen": 7088640, + "step": 14400 + }, + { + "epoch": 1.9011482116932823, + "grad_norm": 0.35198983550071716, + "learning_rate": 1.5586309047125039e-06, + "loss": 0.0028, + "num_input_tokens_seen": 7091456, + "step": 14405 + }, + { + "epoch": 1.9018081034710308, + "grad_norm": 0.14709027111530304, + "learning_rate": 1.5582487348775862e-06, + "loss": 0.0625, + "num_input_tokens_seen": 7093952, + "step": 14410 + }, + { + "epoch": 1.902467995248779, + "grad_norm": 0.6149783730506897, + "learning_rate": 1.5578664465584168e-06, + "loss": 0.0348, + "num_input_tokens_seen": 7096256, + "step": 14415 + }, + { + "epoch": 1.9031278870265278, + "grad_norm": 0.035323966294527054, + "learning_rate": 1.5574840398361339e-06, + "loss": 0.1055, + "num_input_tokens_seen": 7098816, + "step": 14420 + }, + { + "epoch": 1.903787778804276, + "grad_norm": 0.07863358408212662, + "learning_rate": 1.5571015147919005e-06, + "loss": 0.0709, + "num_input_tokens_seen": 7101056, + "step": 14425 + }, + { + "epoch": 1.9044476705820246, + "grad_norm": 0.08664238452911377, + "learning_rate": 1.5567188715069048e-06, + "loss": 0.1621, + "num_input_tokens_seen": 7103488, + "step": 14430 + }, + { + "epoch": 1.905107562359773, + "grad_norm": 0.02262006513774395, + "learning_rate": 1.5563361100623604e-06, + "loss": 0.0179, + "num_input_tokens_seen": 7105792, + "step": 14435 + }, + { + "epoch": 1.9057674541375214, + "grad_norm": 0.024045400321483612, + "learning_rate": 1.555953230539505e-06, + "loss": 0.0417, + "num_input_tokens_seen": 7107968, + "step": 14440 + }, + { + "epoch": 1.9064273459152699, + "grad_norm": 0.13778825104236603, + "learning_rate": 1.5555702330196021e-06, + "loss": 0.0521, + "num_input_tokens_seen": 7110592, + "step": 14445 + }, + { + "epoch": 1.9070872376930184, + "grad_norm": 63.8028564453125, + "learning_rate": 1.5551871175839406e-06, + "loss": 0.2691, + "num_input_tokens_seen": 7112704, + "step": 14450 + }, + { + "epoch": 1.9077471294707666, + "grad_norm": 0.0490015372633934, + "learning_rate": 1.5548038843138338e-06, + "loss": 0.0585, + "num_input_tokens_seen": 7115200, + "step": 14455 + }, + { + "epoch": 1.9084070212485154, + "grad_norm": 0.03240058943629265, + "learning_rate": 1.5544205332906201e-06, + "loss": 0.1164, + "num_input_tokens_seen": 7117632, + "step": 14460 + }, + { + "epoch": 1.9090669130262636, + "grad_norm": 0.39769986271858215, + "learning_rate": 1.554037064595663e-06, + "loss": 0.221, + "num_input_tokens_seen": 7120064, + "step": 14465 + }, + { + "epoch": 1.9097268048040121, + "grad_norm": 0.09407158941030502, + "learning_rate": 1.553653478310351e-06, + "loss": 0.1352, + "num_input_tokens_seen": 7122624, + "step": 14470 + }, + { + "epoch": 1.9103866965817606, + "grad_norm": 0.3653717041015625, + "learning_rate": 1.5532697745160972e-06, + "loss": 0.0808, + "num_input_tokens_seen": 7124992, + "step": 14475 + }, + { + "epoch": 1.911046588359509, + "grad_norm": 0.887218713760376, + "learning_rate": 1.5528859532943405e-06, + "loss": 0.0018, + "num_input_tokens_seen": 7127424, + "step": 14480 + }, + { + "epoch": 1.9117064801372576, + "grad_norm": 0.211788609623909, + "learning_rate": 1.552502014726544e-06, + "loss": 0.0209, + "num_input_tokens_seen": 7129920, + "step": 14485 + }, + { + "epoch": 1.912366371915006, + "grad_norm": 0.03963814303278923, + "learning_rate": 1.5521179588941956e-06, + "loss": 0.0735, + "num_input_tokens_seen": 7132352, + "step": 14490 + }, + { + "epoch": 1.9130262636927544, + "grad_norm": 0.1944819688796997, + "learning_rate": 1.5517337858788087e-06, + "loss": 0.0481, + "num_input_tokens_seen": 7134848, + "step": 14495 + }, + { + "epoch": 1.913686155470503, + "grad_norm": 0.17637357115745544, + "learning_rate": 1.551349495761921e-06, + "loss": 0.0019, + "num_input_tokens_seen": 7137344, + "step": 14500 + }, + { + "epoch": 1.9143460472482512, + "grad_norm": 0.063203364610672, + "learning_rate": 1.550965088625095e-06, + "loss": 0.1998, + "num_input_tokens_seen": 7139968, + "step": 14505 + }, + { + "epoch": 1.9150059390259997, + "grad_norm": 0.14308017492294312, + "learning_rate": 1.5505805645499192e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7142464, + "step": 14510 + }, + { + "epoch": 1.9156658308037482, + "grad_norm": 0.38713952898979187, + "learning_rate": 1.5501959236180053e-06, + "loss": 0.0447, + "num_input_tokens_seen": 7145088, + "step": 14515 + }, + { + "epoch": 1.9163257225814965, + "grad_norm": 0.03757447376847267, + "learning_rate": 1.5498111659109908e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7147712, + "step": 14520 + }, + { + "epoch": 1.9169856143592452, + "grad_norm": 14.420360565185547, + "learning_rate": 1.549426291510538e-06, + "loss": 0.1215, + "num_input_tokens_seen": 7150144, + "step": 14525 + }, + { + "epoch": 1.9176455061369935, + "grad_norm": 15.488569259643555, + "learning_rate": 1.5490413004983334e-06, + "loss": 0.215, + "num_input_tokens_seen": 7152640, + "step": 14530 + }, + { + "epoch": 1.918305397914742, + "grad_norm": 0.0733942836523056, + "learning_rate": 1.5486561929560887e-06, + "loss": 0.1271, + "num_input_tokens_seen": 7155200, + "step": 14535 + }, + { + "epoch": 1.9189652896924905, + "grad_norm": 0.07937513291835785, + "learning_rate": 1.5482709689655398e-06, + "loss": 0.0219, + "num_input_tokens_seen": 7157376, + "step": 14540 + }, + { + "epoch": 1.9196251814702388, + "grad_norm": 0.6346595287322998, + "learning_rate": 1.5478856286084483e-06, + "loss": 0.0957, + "num_input_tokens_seen": 7160000, + "step": 14545 + }, + { + "epoch": 1.9202850732479875, + "grad_norm": 0.18721289932727814, + "learning_rate": 1.5475001719665994e-06, + "loss": 0.1187, + "num_input_tokens_seen": 7162432, + "step": 14550 + }, + { + "epoch": 1.9209449650257358, + "grad_norm": 0.06002054363489151, + "learning_rate": 1.5471145991218037e-06, + "loss": 0.256, + "num_input_tokens_seen": 7164736, + "step": 14555 + }, + { + "epoch": 1.9216048568034843, + "grad_norm": 0.2037287801504135, + "learning_rate": 1.5467289101558962e-06, + "loss": 0.0013, + "num_input_tokens_seen": 7167104, + "step": 14560 + }, + { + "epoch": 1.9222647485812328, + "grad_norm": 0.06793426722288132, + "learning_rate": 1.5463431051507368e-06, + "loss": 0.0016, + "num_input_tokens_seen": 7169920, + "step": 14565 + }, + { + "epoch": 1.922924640358981, + "grad_norm": 0.43444526195526123, + "learning_rate": 1.5459571841882095e-06, + "loss": 0.0322, + "num_input_tokens_seen": 7172224, + "step": 14570 + }, + { + "epoch": 1.9235845321367295, + "grad_norm": 0.02774728089570999, + "learning_rate": 1.5455711473502233e-06, + "loss": 0.0529, + "num_input_tokens_seen": 7174848, + "step": 14575 + }, + { + "epoch": 1.924244423914478, + "grad_norm": 0.1229424998164177, + "learning_rate": 1.5451849947187121e-06, + "loss": 0.1527, + "num_input_tokens_seen": 7177536, + "step": 14580 + }, + { + "epoch": 1.9249043156922263, + "grad_norm": 17.570878982543945, + "learning_rate": 1.5447987263756335e-06, + "loss": 0.1494, + "num_input_tokens_seen": 7179712, + "step": 14585 + }, + { + "epoch": 1.925564207469975, + "grad_norm": 5.2115302085876465, + "learning_rate": 1.5444123424029703e-06, + "loss": 0.0758, + "num_input_tokens_seen": 7182016, + "step": 14590 + }, + { + "epoch": 1.9262240992477233, + "grad_norm": 0.03287997841835022, + "learning_rate": 1.5440258428827298e-06, + "loss": 0.0535, + "num_input_tokens_seen": 7184640, + "step": 14595 + }, + { + "epoch": 1.9268839910254718, + "grad_norm": 0.06192353367805481, + "learning_rate": 1.5436392278969438e-06, + "loss": 0.0964, + "num_input_tokens_seen": 7187072, + "step": 14600 + }, + { + "epoch": 1.9275438828032203, + "grad_norm": 0.10755826532840729, + "learning_rate": 1.5432524975276681e-06, + "loss": 0.0184, + "num_input_tokens_seen": 7189312, + "step": 14605 + }, + { + "epoch": 1.9282037745809686, + "grad_norm": 0.039796650409698486, + "learning_rate": 1.5428656518569838e-06, + "loss": 0.0008, + "num_input_tokens_seen": 7191680, + "step": 14610 + }, + { + "epoch": 1.9288636663587173, + "grad_norm": 0.018832655623555183, + "learning_rate": 1.5424786909669962e-06, + "loss": 0.2284, + "num_input_tokens_seen": 7194048, + "step": 14615 + }, + { + "epoch": 1.9295235581364656, + "grad_norm": 0.12818144261837006, + "learning_rate": 1.5420916149398346e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7196800, + "step": 14620 + }, + { + "epoch": 1.930183449914214, + "grad_norm": 0.10920975357294083, + "learning_rate": 1.5417044238576533e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7199232, + "step": 14625 + }, + { + "epoch": 1.9308433416919626, + "grad_norm": 106.05785369873047, + "learning_rate": 1.5413171178026308e-06, + "loss": 0.1908, + "num_input_tokens_seen": 7201792, + "step": 14630 + }, + { + "epoch": 1.9315032334697109, + "grad_norm": 0.03340896964073181, + "learning_rate": 1.5409296968569698e-06, + "loss": 0.073, + "num_input_tokens_seen": 7204352, + "step": 14635 + }, + { + "epoch": 1.9321631252474594, + "grad_norm": 0.07122701406478882, + "learning_rate": 1.540542161102898e-06, + "loss": 0.0874, + "num_input_tokens_seen": 7206912, + "step": 14640 + }, + { + "epoch": 1.9328230170252079, + "grad_norm": 0.20408612489700317, + "learning_rate": 1.5401545106226665e-06, + "loss": 0.0365, + "num_input_tokens_seen": 7209152, + "step": 14645 + }, + { + "epoch": 1.9334829088029564, + "grad_norm": 17.965118408203125, + "learning_rate": 1.539766745498552e-06, + "loss": 0.1238, + "num_input_tokens_seen": 7211712, + "step": 14650 + }, + { + "epoch": 1.9341428005807049, + "grad_norm": 89.8632583618164, + "learning_rate": 1.5393788658128542e-06, + "loss": 0.0643, + "num_input_tokens_seen": 7214144, + "step": 14655 + }, + { + "epoch": 1.9348026923584531, + "grad_norm": 0.12914709746837616, + "learning_rate": 1.538990871647898e-06, + "loss": 0.0293, + "num_input_tokens_seen": 7216768, + "step": 14660 + }, + { + "epoch": 1.9354625841362016, + "grad_norm": 0.4041579067707062, + "learning_rate": 1.5386027630860324e-06, + "loss": 0.05, + "num_input_tokens_seen": 7219200, + "step": 14665 + }, + { + "epoch": 1.9361224759139501, + "grad_norm": 0.2001969814300537, + "learning_rate": 1.5382145402096307e-06, + "loss": 0.05, + "num_input_tokens_seen": 7221760, + "step": 14670 + }, + { + "epoch": 1.9367823676916984, + "grad_norm": 0.018148990347981453, + "learning_rate": 1.53782620310109e-06, + "loss": 0.0418, + "num_input_tokens_seen": 7224192, + "step": 14675 + }, + { + "epoch": 1.9374422594694471, + "grad_norm": 0.01664876751601696, + "learning_rate": 1.5374377518428324e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7226560, + "step": 14680 + }, + { + "epoch": 1.9381021512471954, + "grad_norm": 0.11727305501699448, + "learning_rate": 1.5370491865173042e-06, + "loss": 0.0937, + "num_input_tokens_seen": 7229056, + "step": 14685 + }, + { + "epoch": 1.938762043024944, + "grad_norm": 0.06682264059782028, + "learning_rate": 1.5366605072069747e-06, + "loss": 0.0438, + "num_input_tokens_seen": 7231872, + "step": 14690 + }, + { + "epoch": 1.9394219348026924, + "grad_norm": 0.08827029913663864, + "learning_rate": 1.5362717139943392e-06, + "loss": 0.0753, + "num_input_tokens_seen": 7234240, + "step": 14695 + }, + { + "epoch": 1.9400818265804407, + "grad_norm": 0.015324097126722336, + "learning_rate": 1.5358828069619155e-06, + "loss": 0.2984, + "num_input_tokens_seen": 7236800, + "step": 14700 + }, + { + "epoch": 1.9407417183581892, + "grad_norm": 0.029757771641016006, + "learning_rate": 1.5354937861922463e-06, + "loss": 0.0489, + "num_input_tokens_seen": 7239424, + "step": 14705 + }, + { + "epoch": 1.9414016101359377, + "grad_norm": 14.723073959350586, + "learning_rate": 1.5351046517678989e-06, + "loss": 0.2381, + "num_input_tokens_seen": 7241920, + "step": 14710 + }, + { + "epoch": 1.9420615019136862, + "grad_norm": 0.08645767718553543, + "learning_rate": 1.534715403771464e-06, + "loss": 0.1093, + "num_input_tokens_seen": 7244160, + "step": 14715 + }, + { + "epoch": 1.9427213936914347, + "grad_norm": 0.06573551148176193, + "learning_rate": 1.5343260422855573e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7246592, + "step": 14720 + }, + { + "epoch": 1.943381285469183, + "grad_norm": 2.026156187057495, + "learning_rate": 1.5339365673928168e-06, + "loss": 0.0014, + "num_input_tokens_seen": 7249280, + "step": 14725 + }, + { + "epoch": 1.9440411772469315, + "grad_norm": 0.019643815234303474, + "learning_rate": 1.5335469791759068e-06, + "loss": 0.0014, + "num_input_tokens_seen": 7251712, + "step": 14730 + }, + { + "epoch": 1.94470106902468, + "grad_norm": 0.04115848243236542, + "learning_rate": 1.5331572777175137e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7254144, + "step": 14735 + }, + { + "epoch": 1.9453609608024283, + "grad_norm": 0.032892487943172455, + "learning_rate": 1.5327674631003493e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7256640, + "step": 14740 + }, + { + "epoch": 1.946020852580177, + "grad_norm": 29.96086311340332, + "learning_rate": 1.5323775354071491e-06, + "loss": 0.2108, + "num_input_tokens_seen": 7259264, + "step": 14745 + }, + { + "epoch": 1.9466807443579253, + "grad_norm": 9.626848220825195, + "learning_rate": 1.531987494720672e-06, + "loss": 0.0767, + "num_input_tokens_seen": 7261760, + "step": 14750 + }, + { + "epoch": 1.9473406361356738, + "grad_norm": 0.31695500016212463, + "learning_rate": 1.5315973411237016e-06, + "loss": 0.1239, + "num_input_tokens_seen": 7264128, + "step": 14755 + }, + { + "epoch": 1.9480005279134223, + "grad_norm": 0.07309925556182861, + "learning_rate": 1.531207074699045e-06, + "loss": 0.0343, + "num_input_tokens_seen": 7266432, + "step": 14760 + }, + { + "epoch": 1.9486604196911705, + "grad_norm": 0.44029781222343445, + "learning_rate": 1.5308166955295334e-06, + "loss": 0.2684, + "num_input_tokens_seen": 7268800, + "step": 14765 + }, + { + "epoch": 1.949320311468919, + "grad_norm": 0.13368448615074158, + "learning_rate": 1.5304262036980221e-06, + "loss": 0.0569, + "num_input_tokens_seen": 7271488, + "step": 14770 + }, + { + "epoch": 1.9499802032466675, + "grad_norm": 0.3928241431713104, + "learning_rate": 1.5300355992873903e-06, + "loss": 0.0474, + "num_input_tokens_seen": 7273984, + "step": 14775 + }, + { + "epoch": 1.950640095024416, + "grad_norm": 13.45598316192627, + "learning_rate": 1.5296448823805407e-06, + "loss": 0.0888, + "num_input_tokens_seen": 7276800, + "step": 14780 + }, + { + "epoch": 1.9512999868021645, + "grad_norm": 0.303527295589447, + "learning_rate": 1.5292540530603998e-06, + "loss": 0.001, + "num_input_tokens_seen": 7279616, + "step": 14785 + }, + { + "epoch": 1.9519598785799128, + "grad_norm": 0.24898892641067505, + "learning_rate": 1.5288631114099193e-06, + "loss": 0.0512, + "num_input_tokens_seen": 7281856, + "step": 14790 + }, + { + "epoch": 1.9526197703576613, + "grad_norm": 0.27193912863731384, + "learning_rate": 1.528472057512073e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7284544, + "step": 14795 + }, + { + "epoch": 1.9532796621354098, + "grad_norm": 0.5412809252738953, + "learning_rate": 1.5280808914498593e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7287040, + "step": 14800 + }, + { + "epoch": 1.953939553913158, + "grad_norm": 0.12621335685253143, + "learning_rate": 1.5276896133063e-06, + "loss": 0.0468, + "num_input_tokens_seen": 7289856, + "step": 14805 + }, + { + "epoch": 1.9545994456909068, + "grad_norm": 0.014875116758048534, + "learning_rate": 1.5272982231644421e-06, + "loss": 0.1041, + "num_input_tokens_seen": 7292096, + "step": 14810 + }, + { + "epoch": 1.955259337468655, + "grad_norm": 164.4103240966797, + "learning_rate": 1.5269067211073545e-06, + "loss": 0.1564, + "num_input_tokens_seen": 7294720, + "step": 14815 + }, + { + "epoch": 1.9559192292464036, + "grad_norm": 0.0532931424677372, + "learning_rate": 1.5265151072181309e-06, + "loss": 0.2679, + "num_input_tokens_seen": 7297344, + "step": 14820 + }, + { + "epoch": 1.956579121024152, + "grad_norm": 0.0477750189602375, + "learning_rate": 1.5261233815798886e-06, + "loss": 0.1056, + "num_input_tokens_seen": 7299776, + "step": 14825 + }, + { + "epoch": 1.9572390128019004, + "grad_norm": 0.3876939117908478, + "learning_rate": 1.5257315442757682e-06, + "loss": 0.0649, + "num_input_tokens_seen": 7302272, + "step": 14830 + }, + { + "epoch": 1.957898904579649, + "grad_norm": 0.052762262523174286, + "learning_rate": 1.5253395953889349e-06, + "loss": 0.0355, + "num_input_tokens_seen": 7304576, + "step": 14835 + }, + { + "epoch": 1.9585587963573974, + "grad_norm": 1.0515028238296509, + "learning_rate": 1.5249475350025764e-06, + "loss": 0.0764, + "num_input_tokens_seen": 7307456, + "step": 14840 + }, + { + "epoch": 1.9592186881351459, + "grad_norm": 0.6155139803886414, + "learning_rate": 1.5245553631999054e-06, + "loss": 0.0557, + "num_input_tokens_seen": 7309824, + "step": 14845 + }, + { + "epoch": 1.9598785799128944, + "grad_norm": 0.06223485246300697, + "learning_rate": 1.5241630800641567e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7312192, + "step": 14850 + }, + { + "epoch": 1.9605384716906427, + "grad_norm": 0.1414095163345337, + "learning_rate": 1.5237706856785898e-06, + "loss": 0.0836, + "num_input_tokens_seen": 7314432, + "step": 14855 + }, + { + "epoch": 1.9611983634683912, + "grad_norm": 0.06543510407209396, + "learning_rate": 1.523378180126488e-06, + "loss": 0.0521, + "num_input_tokens_seen": 7316928, + "step": 14860 + }, + { + "epoch": 1.9618582552461397, + "grad_norm": 0.09722074866294861, + "learning_rate": 1.5229855634911575e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7319616, + "step": 14865 + }, + { + "epoch": 1.962518147023888, + "grad_norm": 0.11939533799886703, + "learning_rate": 1.5225928358559283e-06, + "loss": 0.002, + "num_input_tokens_seen": 7321856, + "step": 14870 + }, + { + "epoch": 1.9631780388016367, + "grad_norm": 0.03377383202314377, + "learning_rate": 1.522199997304154e-06, + "loss": 0.083, + "num_input_tokens_seen": 7324224, + "step": 14875 + }, + { + "epoch": 1.963837930579385, + "grad_norm": 7.487933158874512, + "learning_rate": 1.5218070479192118e-06, + "loss": 0.047, + "num_input_tokens_seen": 7326656, + "step": 14880 + }, + { + "epoch": 1.9644978223571334, + "grad_norm": 53.403724670410156, + "learning_rate": 1.521413987784502e-06, + "loss": 0.3865, + "num_input_tokens_seen": 7329280, + "step": 14885 + }, + { + "epoch": 1.965157714134882, + "grad_norm": 0.09187313914299011, + "learning_rate": 1.5210208169834496e-06, + "loss": 0.1041, + "num_input_tokens_seen": 7331648, + "step": 14890 + }, + { + "epoch": 1.9658176059126302, + "grad_norm": 0.21420453488826752, + "learning_rate": 1.5206275355995013e-06, + "loss": 0.0013, + "num_input_tokens_seen": 7334144, + "step": 14895 + }, + { + "epoch": 1.966477497690379, + "grad_norm": 0.47146058082580566, + "learning_rate": 1.5202341437161288e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7336640, + "step": 14900 + }, + { + "epoch": 1.9671373894681272, + "grad_norm": 0.05956108495593071, + "learning_rate": 1.5198406414168266e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7339072, + "step": 14905 + }, + { + "epoch": 1.9677972812458757, + "grad_norm": 0.03283373638987541, + "learning_rate": 1.5194470287851124e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7341504, + "step": 14910 + }, + { + "epoch": 1.9684571730236242, + "grad_norm": 0.0706038549542427, + "learning_rate": 1.5190533059045284e-06, + "loss": 0.149, + "num_input_tokens_seen": 7343936, + "step": 14915 + }, + { + "epoch": 1.9691170648013725, + "grad_norm": 0.029689691960811615, + "learning_rate": 1.5186594728586383e-06, + "loss": 0.1659, + "num_input_tokens_seen": 7346368, + "step": 14920 + }, + { + "epoch": 1.969776956579121, + "grad_norm": 3.8806729316711426, + "learning_rate": 1.518265529731031e-06, + "loss": 0.0452, + "num_input_tokens_seen": 7348800, + "step": 14925 + }, + { + "epoch": 1.9704368483568695, + "grad_norm": 0.1849229484796524, + "learning_rate": 1.5178714766053185e-06, + "loss": 0.1882, + "num_input_tokens_seen": 7350848, + "step": 14930 + }, + { + "epoch": 1.9710967401346178, + "grad_norm": 41.28623580932617, + "learning_rate": 1.5174773135651347e-06, + "loss": 0.1838, + "num_input_tokens_seen": 7353344, + "step": 14935 + }, + { + "epoch": 1.9717566319123665, + "grad_norm": 0.12453329563140869, + "learning_rate": 1.5170830406941386e-06, + "loss": 0.0747, + "num_input_tokens_seen": 7355776, + "step": 14940 + }, + { + "epoch": 1.9724165236901148, + "grad_norm": 0.02459733560681343, + "learning_rate": 1.5166886580760114e-06, + "loss": 0.003, + "num_input_tokens_seen": 7358720, + "step": 14945 + }, + { + "epoch": 1.9730764154678633, + "grad_norm": 0.03575903922319412, + "learning_rate": 1.5162941657944584e-06, + "loss": 0.0494, + "num_input_tokens_seen": 7361216, + "step": 14950 + }, + { + "epoch": 1.9737363072456118, + "grad_norm": 10.720014572143555, + "learning_rate": 1.5158995639332073e-06, + "loss": 0.0556, + "num_input_tokens_seen": 7363904, + "step": 14955 + }, + { + "epoch": 1.97439619902336, + "grad_norm": 0.049669649451971054, + "learning_rate": 1.5155048525760095e-06, + "loss": 0.1777, + "num_input_tokens_seen": 7366400, + "step": 14960 + }, + { + "epoch": 1.9750560908011088, + "grad_norm": 11.927507400512695, + "learning_rate": 1.5151100318066396e-06, + "loss": 0.0443, + "num_input_tokens_seen": 7368896, + "step": 14965 + }, + { + "epoch": 1.975715982578857, + "grad_norm": 14.424137115478516, + "learning_rate": 1.5147151017088958e-06, + "loss": 0.1217, + "num_input_tokens_seen": 7371392, + "step": 14970 + }, + { + "epoch": 1.9763758743566056, + "grad_norm": 0.06754660606384277, + "learning_rate": 1.514320062366599e-06, + "loss": 0.0033, + "num_input_tokens_seen": 7373888, + "step": 14975 + }, + { + "epoch": 1.977035766134354, + "grad_norm": 0.05533408746123314, + "learning_rate": 1.513924913863593e-06, + "loss": 0.0015, + "num_input_tokens_seen": 7376448, + "step": 14980 + }, + { + "epoch": 1.9776956579121023, + "grad_norm": 69.94572448730469, + "learning_rate": 1.513529656283746e-06, + "loss": 0.1575, + "num_input_tokens_seen": 7378624, + "step": 14985 + }, + { + "epoch": 1.9783555496898508, + "grad_norm": 0.45651572942733765, + "learning_rate": 1.513134289710948e-06, + "loss": 0.0303, + "num_input_tokens_seen": 7381184, + "step": 14990 + }, + { + "epoch": 1.9790154414675993, + "grad_norm": 0.38808608055114746, + "learning_rate": 1.5127388142291126e-06, + "loss": 0.0846, + "num_input_tokens_seen": 7383488, + "step": 14995 + }, + { + "epoch": 1.9796753332453476, + "grad_norm": 0.08070173114538193, + "learning_rate": 1.5123432299221772e-06, + "loss": 0.0485, + "num_input_tokens_seen": 7385792, + "step": 15000 + }, + { + "epoch": 1.9803352250230963, + "grad_norm": 0.26309722661972046, + "learning_rate": 1.5119475368741013e-06, + "loss": 0.0013, + "num_input_tokens_seen": 7388416, + "step": 15005 + }, + { + "epoch": 1.9809951168008446, + "grad_norm": 48.13701629638672, + "learning_rate": 1.5115517351688679e-06, + "loss": 0.0646, + "num_input_tokens_seen": 7390720, + "step": 15010 + }, + { + "epoch": 1.9816550085785931, + "grad_norm": 0.3674798905849457, + "learning_rate": 1.5111558248904829e-06, + "loss": 0.0839, + "num_input_tokens_seen": 7393152, + "step": 15015 + }, + { + "epoch": 1.9823149003563416, + "grad_norm": 24.177644729614258, + "learning_rate": 1.5107598061229755e-06, + "loss": 0.1332, + "num_input_tokens_seen": 7395328, + "step": 15020 + }, + { + "epoch": 1.98297479213409, + "grad_norm": 10.63642406463623, + "learning_rate": 1.510363678950398e-06, + "loss": 0.1803, + "num_input_tokens_seen": 7397696, + "step": 15025 + }, + { + "epoch": 1.9836346839118386, + "grad_norm": 0.4671352505683899, + "learning_rate": 1.509967443456826e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7399936, + "step": 15030 + }, + { + "epoch": 1.984294575689587, + "grad_norm": 0.18537390232086182, + "learning_rate": 1.5095710997263562e-06, + "loss": 0.0032, + "num_input_tokens_seen": 7402048, + "step": 15035 + }, + { + "epoch": 1.9849544674673354, + "grad_norm": 21.04948616027832, + "learning_rate": 1.509174647843111e-06, + "loss": 0.0739, + "num_input_tokens_seen": 7404288, + "step": 15040 + }, + { + "epoch": 1.985614359245084, + "grad_norm": 0.08610837906599045, + "learning_rate": 1.5087780878912335e-06, + "loss": 0.0578, + "num_input_tokens_seen": 7406528, + "step": 15045 + }, + { + "epoch": 1.9862742510228322, + "grad_norm": 0.06923052668571472, + "learning_rate": 1.5083814199548912e-06, + "loss": 0.0584, + "num_input_tokens_seen": 7408832, + "step": 15050 + }, + { + "epoch": 1.9869341428005807, + "grad_norm": 0.07516025751829147, + "learning_rate": 1.5079846441182744e-06, + "loss": 0.144, + "num_input_tokens_seen": 7411584, + "step": 15055 + }, + { + "epoch": 1.9875940345783292, + "grad_norm": 0.07054906338453293, + "learning_rate": 1.5075877604655948e-06, + "loss": 0.0525, + "num_input_tokens_seen": 7414080, + "step": 15060 + }, + { + "epoch": 1.9882539263560775, + "grad_norm": 0.025323480367660522, + "learning_rate": 1.5071907690810892e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7416384, + "step": 15065 + }, + { + "epoch": 1.9889138181338262, + "grad_norm": 0.31888553500175476, + "learning_rate": 1.5067936700490153e-06, + "loss": 0.1545, + "num_input_tokens_seen": 7418752, + "step": 15070 + }, + { + "epoch": 1.9895737099115745, + "grad_norm": 0.3737231492996216, + "learning_rate": 1.5063964634536553e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7420992, + "step": 15075 + }, + { + "epoch": 1.990233601689323, + "grad_norm": 17.460901260375977, + "learning_rate": 1.5059991493793124e-06, + "loss": 0.2178, + "num_input_tokens_seen": 7423488, + "step": 15080 + }, + { + "epoch": 1.9908934934670715, + "grad_norm": 44.136966705322266, + "learning_rate": 1.5056017279103146e-06, + "loss": 0.095, + "num_input_tokens_seen": 7425920, + "step": 15085 + }, + { + "epoch": 1.9915533852448197, + "grad_norm": 0.05283686891198158, + "learning_rate": 1.505204199131011e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7428608, + "step": 15090 + }, + { + "epoch": 1.9922132770225685, + "grad_norm": 0.030043108388781548, + "learning_rate": 1.5048065631257748e-06, + "loss": 0.045, + "num_input_tokens_seen": 7431168, + "step": 15095 + }, + { + "epoch": 1.9928731688003167, + "grad_norm": 0.2686857283115387, + "learning_rate": 1.5044088199790012e-06, + "loss": 0.0011, + "num_input_tokens_seen": 7433728, + "step": 15100 + }, + { + "epoch": 1.9935330605780652, + "grad_norm": 38.80851745605469, + "learning_rate": 1.5040109697751082e-06, + "loss": 0.1167, + "num_input_tokens_seen": 7436352, + "step": 15105 + }, + { + "epoch": 1.9941929523558137, + "grad_norm": 0.10195082426071167, + "learning_rate": 1.5036130125985364e-06, + "loss": 0.1322, + "num_input_tokens_seen": 7438784, + "step": 15110 + }, + { + "epoch": 1.994852844133562, + "grad_norm": 0.02739228866994381, + "learning_rate": 1.5032149485337494e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7441152, + "step": 15115 + }, + { + "epoch": 1.9955127359113105, + "grad_norm": 0.0074937110766768456, + "learning_rate": 1.5028167776652339e-06, + "loss": 0.1143, + "num_input_tokens_seen": 7443456, + "step": 15120 + }, + { + "epoch": 1.996172627689059, + "grad_norm": 0.4618469476699829, + "learning_rate": 1.5024185000774984e-06, + "loss": 0.0071, + "num_input_tokens_seen": 7446016, + "step": 15125 + }, + { + "epoch": 1.9968325194668073, + "grad_norm": 0.3459224998950958, + "learning_rate": 1.5020201158550745e-06, + "loss": 0.172, + "num_input_tokens_seen": 7448448, + "step": 15130 + }, + { + "epoch": 1.997492411244556, + "grad_norm": 0.6738158464431763, + "learning_rate": 1.5016216250825164e-06, + "loss": 0.1166, + "num_input_tokens_seen": 7451008, + "step": 15135 + }, + { + "epoch": 1.9981523030223043, + "grad_norm": 0.19503681361675262, + "learning_rate": 1.5012230278444005e-06, + "loss": 0.0687, + "num_input_tokens_seen": 7453504, + "step": 15140 + }, + { + "epoch": 1.9988121948000528, + "grad_norm": 0.1917676031589508, + "learning_rate": 1.5008243242253269e-06, + "loss": 0.0024, + "num_input_tokens_seen": 7456000, + "step": 15145 + }, + { + "epoch": 1.9994720865778013, + "grad_norm": 0.06810544431209564, + "learning_rate": 1.5004255143099167e-06, + "loss": 0.0027, + "num_input_tokens_seen": 7458496, + "step": 15150 + }, + { + "epoch": 2.0001319783555496, + "grad_norm": 0.10668352246284485, + "learning_rate": 1.5000265981828153e-06, + "loss": 0.0349, + "num_input_tokens_seen": 7460784, + "step": 15155 + }, + { + "epoch": 2.0007918701332983, + "grad_norm": 0.04501849785447121, + "learning_rate": 1.4996275759286894e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7463024, + "step": 15160 + }, + { + "epoch": 2.0007918701332983, + "eval_loss": 0.1182011216878891, + "eval_runtime": 7.9776, + "eval_samples_per_second": 844.239, + "eval_steps_per_second": 105.546, + "num_input_tokens_seen": 7463024, + "step": 15160 + }, + { + "epoch": 2.0014517619110466, + "grad_norm": 0.7536020278930664, + "learning_rate": 1.4992284476322283e-06, + "loss": 0.0012, + "num_input_tokens_seen": 7465456, + "step": 15165 + }, + { + "epoch": 2.002111653688795, + "grad_norm": 0.7476214170455933, + "learning_rate": 1.4988292133781445e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7467632, + "step": 15170 + }, + { + "epoch": 2.0027715454665436, + "grad_norm": 0.02698984183371067, + "learning_rate": 1.498429873251172e-06, + "loss": 0.0406, + "num_input_tokens_seen": 7470192, + "step": 15175 + }, + { + "epoch": 2.003431437244292, + "grad_norm": 27.516366958618164, + "learning_rate": 1.4980304273360686e-06, + "loss": 0.1068, + "num_input_tokens_seen": 7472432, + "step": 15180 + }, + { + "epoch": 2.0040913290220406, + "grad_norm": 0.051883816719055176, + "learning_rate": 1.4976308757176135e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7474928, + "step": 15185 + }, + { + "epoch": 2.004751220799789, + "grad_norm": 0.022894321009516716, + "learning_rate": 1.4972312184806084e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7477296, + "step": 15190 + }, + { + "epoch": 2.005411112577537, + "grad_norm": 43.75878143310547, + "learning_rate": 1.496831455709878e-06, + "loss": 0.2538, + "num_input_tokens_seen": 7479728, + "step": 15195 + }, + { + "epoch": 2.006071004355286, + "grad_norm": 0.021939776837825775, + "learning_rate": 1.4964315874902687e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7482096, + "step": 15200 + }, + { + "epoch": 2.006730896133034, + "grad_norm": 0.03236817568540573, + "learning_rate": 1.49603161390665e-06, + "loss": 0.0712, + "num_input_tokens_seen": 7484400, + "step": 15205 + }, + { + "epoch": 2.007390787910783, + "grad_norm": 0.0764591321349144, + "learning_rate": 1.495631535043913e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7486704, + "step": 15210 + }, + { + "epoch": 2.008050679688531, + "grad_norm": 0.0899280309677124, + "learning_rate": 1.4952313509869722e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7489200, + "step": 15215 + }, + { + "epoch": 2.0087105714662794, + "grad_norm": 0.0050004273653030396, + "learning_rate": 1.4948310618207628e-06, + "loss": 0.0876, + "num_input_tokens_seen": 7491504, + "step": 15220 + }, + { + "epoch": 2.009370463244028, + "grad_norm": 0.04471347853541374, + "learning_rate": 1.4944306676302442e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7493872, + "step": 15225 + }, + { + "epoch": 2.0100303550217764, + "grad_norm": 0.5096481442451477, + "learning_rate": 1.4940301685003967e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7496176, + "step": 15230 + }, + { + "epoch": 2.0106902467995247, + "grad_norm": 15.463436126708984, + "learning_rate": 1.4936295645162232e-06, + "loss": 0.0935, + "num_input_tokens_seen": 7498864, + "step": 15235 + }, + { + "epoch": 2.0113501385772734, + "grad_norm": 0.29590997099876404, + "learning_rate": 1.4932288557627497e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7501296, + "step": 15240 + }, + { + "epoch": 2.0120100303550217, + "grad_norm": 0.08997692912817001, + "learning_rate": 1.4928280423250228e-06, + "loss": 0.0768, + "num_input_tokens_seen": 7503856, + "step": 15245 + }, + { + "epoch": 2.0126699221327704, + "grad_norm": 0.03307807072997093, + "learning_rate": 1.4924271242881128e-06, + "loss": 0.0711, + "num_input_tokens_seen": 7506352, + "step": 15250 + }, + { + "epoch": 2.0133298139105187, + "grad_norm": 0.025954531505703926, + "learning_rate": 1.4920261017371116e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7508912, + "step": 15255 + }, + { + "epoch": 2.013989705688267, + "grad_norm": 0.005450637079775333, + "learning_rate": 1.4916249747571333e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7511280, + "step": 15260 + }, + { + "epoch": 2.0146495974660157, + "grad_norm": 23.70391082763672, + "learning_rate": 1.4912237434333142e-06, + "loss": 0.0789, + "num_input_tokens_seen": 7513904, + "step": 15265 + }, + { + "epoch": 2.015309489243764, + "grad_norm": 2.1816422939300537, + "learning_rate": 1.4908224078508125e-06, + "loss": 0.0778, + "num_input_tokens_seen": 7516720, + "step": 15270 + }, + { + "epoch": 2.0159693810215127, + "grad_norm": 18.413654327392578, + "learning_rate": 1.4904209680948092e-06, + "loss": 0.0883, + "num_input_tokens_seen": 7519152, + "step": 15275 + }, + { + "epoch": 2.016629272799261, + "grad_norm": 0.026565132662653923, + "learning_rate": 1.4900194242505067e-06, + "loss": 0.0014, + "num_input_tokens_seen": 7521648, + "step": 15280 + }, + { + "epoch": 2.0172891645770092, + "grad_norm": 0.35007259249687195, + "learning_rate": 1.48961777640313e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7524144, + "step": 15285 + }, + { + "epoch": 2.017949056354758, + "grad_norm": 0.0682162195444107, + "learning_rate": 1.4892160246379257e-06, + "loss": 0.0538, + "num_input_tokens_seen": 7526704, + "step": 15290 + }, + { + "epoch": 2.0186089481325062, + "grad_norm": 0.1767728328704834, + "learning_rate": 1.4888141690401628e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7529264, + "step": 15295 + }, + { + "epoch": 2.0192688399102545, + "grad_norm": 0.0036514129023998976, + "learning_rate": 1.488412209695132e-06, + "loss": 0.0493, + "num_input_tokens_seen": 7531696, + "step": 15300 + }, + { + "epoch": 2.0199287316880032, + "grad_norm": 0.406444251537323, + "learning_rate": 1.4880101466881464e-06, + "loss": 0.0108, + "num_input_tokens_seen": 7533872, + "step": 15305 + }, + { + "epoch": 2.0205886234657515, + "grad_norm": 0.08499430119991302, + "learning_rate": 1.4876079801045418e-06, + "loss": 0.0706, + "num_input_tokens_seen": 7535920, + "step": 15310 + }, + { + "epoch": 2.0212485152435002, + "grad_norm": 0.881211519241333, + "learning_rate": 1.4872057100296738e-06, + "loss": 0.0015, + "num_input_tokens_seen": 7538288, + "step": 15315 + }, + { + "epoch": 2.0219084070212485, + "grad_norm": 0.021755851805210114, + "learning_rate": 1.4868033365489222e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7540528, + "step": 15320 + }, + { + "epoch": 2.022568298798997, + "grad_norm": 0.00886019691824913, + "learning_rate": 1.4864008597476873e-06, + "loss": 0.0521, + "num_input_tokens_seen": 7543024, + "step": 15325 + }, + { + "epoch": 2.0232281905767455, + "grad_norm": 0.017470117658376694, + "learning_rate": 1.4859982797113923e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7545392, + "step": 15330 + }, + { + "epoch": 2.023888082354494, + "grad_norm": 0.0028445960488170385, + "learning_rate": 1.4855955965254816e-06, + "loss": 0.0533, + "num_input_tokens_seen": 7547632, + "step": 15335 + }, + { + "epoch": 2.0245479741322425, + "grad_norm": 0.0514727346599102, + "learning_rate": 1.485192810275422e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7550192, + "step": 15340 + }, + { + "epoch": 2.025207865909991, + "grad_norm": 0.002774176187813282, + "learning_rate": 1.4847899210467021e-06, + "loss": 0.096, + "num_input_tokens_seen": 7552688, + "step": 15345 + }, + { + "epoch": 2.025867757687739, + "grad_norm": 0.005656952038407326, + "learning_rate": 1.4843869289248318e-06, + "loss": 0.1187, + "num_input_tokens_seen": 7555312, + "step": 15350 + }, + { + "epoch": 2.026527649465488, + "grad_norm": 0.0056389025412499905, + "learning_rate": 1.483983833995344e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7558128, + "step": 15355 + }, + { + "epoch": 2.027187541243236, + "grad_norm": 0.0857715830206871, + "learning_rate": 1.4835806363437915e-06, + "loss": 0.0011, + "num_input_tokens_seen": 7560368, + "step": 15360 + }, + { + "epoch": 2.0278474330209844, + "grad_norm": 0.013230415992438793, + "learning_rate": 1.4831773360557513e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7562736, + "step": 15365 + }, + { + "epoch": 2.028507324798733, + "grad_norm": 0.6576191186904907, + "learning_rate": 1.4827739332168201e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7565424, + "step": 15370 + }, + { + "epoch": 2.0291672165764814, + "grad_norm": 0.29561471939086914, + "learning_rate": 1.4823704279126172e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7567856, + "step": 15375 + }, + { + "epoch": 2.02982710835423, + "grad_norm": 17.580387115478516, + "learning_rate": 1.4819668202287847e-06, + "loss": 0.0433, + "num_input_tokens_seen": 7570352, + "step": 15380 + }, + { + "epoch": 2.0304870001319784, + "grad_norm": 2.213040590286255, + "learning_rate": 1.4815631102509843e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7572720, + "step": 15385 + }, + { + "epoch": 2.0311468919097266, + "grad_norm": 0.0046477969735860825, + "learning_rate": 1.4811592980649014e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7574960, + "step": 15390 + }, + { + "epoch": 2.0318067836874754, + "grad_norm": 0.029864639043807983, + "learning_rate": 1.4807553837562415e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7577584, + "step": 15395 + }, + { + "epoch": 2.0324666754652236, + "grad_norm": 0.004414161667227745, + "learning_rate": 1.4803513674107325e-06, + "loss": 0.0, + "num_input_tokens_seen": 7580016, + "step": 15400 + }, + { + "epoch": 2.0331265672429724, + "grad_norm": 0.002472514286637306, + "learning_rate": 1.4799472491141245e-06, + "loss": 0.0846, + "num_input_tokens_seen": 7582320, + "step": 15405 + }, + { + "epoch": 2.0337864590207206, + "grad_norm": 23.93593978881836, + "learning_rate": 1.4795430289521885e-06, + "loss": 0.0432, + "num_input_tokens_seen": 7585008, + "step": 15410 + }, + { + "epoch": 2.034446350798469, + "grad_norm": 0.0009444769821129739, + "learning_rate": 1.479138707010717e-06, + "loss": 0.0021, + "num_input_tokens_seen": 7587760, + "step": 15415 + }, + { + "epoch": 2.0351062425762176, + "grad_norm": 0.605518639087677, + "learning_rate": 1.4787342833755245e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7590256, + "step": 15420 + }, + { + "epoch": 2.035766134353966, + "grad_norm": 0.004898503888398409, + "learning_rate": 1.4783297581324472e-06, + "loss": 0.0977, + "num_input_tokens_seen": 7592688, + "step": 15425 + }, + { + "epoch": 2.036426026131714, + "grad_norm": 0.0124160535633564, + "learning_rate": 1.4779251313673422e-06, + "loss": 0.0257, + "num_input_tokens_seen": 7595120, + "step": 15430 + }, + { + "epoch": 2.037085917909463, + "grad_norm": 0.2338225543498993, + "learning_rate": 1.4775204031660894e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7597680, + "step": 15435 + }, + { + "epoch": 2.037745809687211, + "grad_norm": 0.0083228200674057, + "learning_rate": 1.4771155736145888e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7600304, + "step": 15440 + }, + { + "epoch": 2.03840570146496, + "grad_norm": 0.0006205007084645331, + "learning_rate": 1.4767106427987625e-06, + "loss": 0.001, + "num_input_tokens_seen": 7602800, + "step": 15445 + }, + { + "epoch": 2.039065593242708, + "grad_norm": 0.013593539595603943, + "learning_rate": 1.4763056108045549e-06, + "loss": 0.0798, + "num_input_tokens_seen": 7605040, + "step": 15450 + }, + { + "epoch": 2.0397254850204565, + "grad_norm": 0.0008863316616043448, + "learning_rate": 1.4759004777179297e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7607408, + "step": 15455 + }, + { + "epoch": 2.040385376798205, + "grad_norm": 0.010385457426309586, + "learning_rate": 1.475495243624875e-06, + "loss": 0.0612, + "num_input_tokens_seen": 7609904, + "step": 15460 + }, + { + "epoch": 2.0410452685759535, + "grad_norm": 0.3944193124771118, + "learning_rate": 1.475089908611398e-06, + "loss": 0.0879, + "num_input_tokens_seen": 7612592, + "step": 15465 + }, + { + "epoch": 2.041705160353702, + "grad_norm": 0.006912830751389265, + "learning_rate": 1.4746844727635282e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7615152, + "step": 15470 + }, + { + "epoch": 2.0423650521314505, + "grad_norm": 15.692147254943848, + "learning_rate": 1.474278936167316e-06, + "loss": 0.1432, + "num_input_tokens_seen": 7617584, + "step": 15475 + }, + { + "epoch": 2.0430249439091988, + "grad_norm": 0.014512135647237301, + "learning_rate": 1.4738732989088347e-06, + "loss": 0.0, + "num_input_tokens_seen": 7620208, + "step": 15480 + }, + { + "epoch": 2.0436848356869475, + "grad_norm": 0.029907351359725, + "learning_rate": 1.4734675610741767e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7622896, + "step": 15485 + }, + { + "epoch": 2.0443447274646958, + "grad_norm": 0.0069085415452718735, + "learning_rate": 1.4730617227494577e-06, + "loss": 0.0506, + "num_input_tokens_seen": 7625328, + "step": 15490 + }, + { + "epoch": 2.045004619242444, + "grad_norm": 0.008318849839270115, + "learning_rate": 1.4726557840208137e-06, + "loss": 0.0549, + "num_input_tokens_seen": 7628208, + "step": 15495 + }, + { + "epoch": 2.0456645110201928, + "grad_norm": 0.05175945535302162, + "learning_rate": 1.4722497449744022e-06, + "loss": 0.0008, + "num_input_tokens_seen": 7630640, + "step": 15500 + }, + { + "epoch": 2.046324402797941, + "grad_norm": 0.2361360341310501, + "learning_rate": 1.471843605696402e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7633072, + "step": 15505 + }, + { + "epoch": 2.0469842945756898, + "grad_norm": 0.014276132918894291, + "learning_rate": 1.4714373662730136e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7635568, + "step": 15510 + }, + { + "epoch": 2.047644186353438, + "grad_norm": 0.016513869166374207, + "learning_rate": 1.4710310267904578e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7638320, + "step": 15515 + }, + { + "epoch": 2.0483040781311863, + "grad_norm": 0.007878591306507587, + "learning_rate": 1.4706245873349777e-06, + "loss": 0.0627, + "num_input_tokens_seen": 7640496, + "step": 15520 + }, + { + "epoch": 2.048963969908935, + "grad_norm": 0.029861019924283028, + "learning_rate": 1.4702180479928368e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7642736, + "step": 15525 + }, + { + "epoch": 2.0496238616866833, + "grad_norm": 0.049566447734832764, + "learning_rate": 1.4698114088503203e-06, + "loss": 0.0519, + "num_input_tokens_seen": 7645104, + "step": 15530 + }, + { + "epoch": 2.050283753464432, + "grad_norm": 0.00440286984667182, + "learning_rate": 1.4694046699937341e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7647472, + "step": 15535 + }, + { + "epoch": 2.0509436452421803, + "grad_norm": 3.612928628921509, + "learning_rate": 1.4689978315094066e-06, + "loss": 0.0015, + "num_input_tokens_seen": 7649968, + "step": 15540 + }, + { + "epoch": 2.0516035370199286, + "grad_norm": 10.197099685668945, + "learning_rate": 1.468590893483685e-06, + "loss": 0.0712, + "num_input_tokens_seen": 7652592, + "step": 15545 + }, + { + "epoch": 2.0522634287976773, + "grad_norm": 0.053775690495967865, + "learning_rate": 1.4681838560029395e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7655216, + "step": 15550 + }, + { + "epoch": 2.0529233205754256, + "grad_norm": 0.4073657691478729, + "learning_rate": 1.467776719153561e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7657840, + "step": 15555 + }, + { + "epoch": 2.053583212353174, + "grad_norm": 15.94839096069336, + "learning_rate": 1.4673694830219613e-06, + "loss": 0.0458, + "num_input_tokens_seen": 7660144, + "step": 15560 + }, + { + "epoch": 2.0542431041309226, + "grad_norm": 0.15345242619514465, + "learning_rate": 1.466962147694573e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7662704, + "step": 15565 + }, + { + "epoch": 2.054902995908671, + "grad_norm": 16.386497497558594, + "learning_rate": 1.4665547132578508e-06, + "loss": 0.093, + "num_input_tokens_seen": 7665200, + "step": 15570 + }, + { + "epoch": 2.0555628876864196, + "grad_norm": 0.06717591732740402, + "learning_rate": 1.466147179798269e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7667376, + "step": 15575 + }, + { + "epoch": 2.056222779464168, + "grad_norm": 0.16586610674858093, + "learning_rate": 1.4657395474023237e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7669680, + "step": 15580 + }, + { + "epoch": 2.056882671241916, + "grad_norm": 0.011295896023511887, + "learning_rate": 1.4653318161565325e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7672176, + "step": 15585 + }, + { + "epoch": 2.057542563019665, + "grad_norm": 0.001465719542466104, + "learning_rate": 1.4649239861474324e-06, + "loss": 0.0523, + "num_input_tokens_seen": 7674800, + "step": 15590 + }, + { + "epoch": 2.058202454797413, + "grad_norm": 0.019439391791820526, + "learning_rate": 1.4645160574615834e-06, + "loss": 0.1052, + "num_input_tokens_seen": 7677232, + "step": 15595 + }, + { + "epoch": 2.058862346575162, + "grad_norm": 0.21349266171455383, + "learning_rate": 1.4641080301855648e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7679536, + "step": 15600 + }, + { + "epoch": 2.05952223835291, + "grad_norm": 0.012627423740923405, + "learning_rate": 1.4636999044059777e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7681840, + "step": 15605 + }, + { + "epoch": 2.0601821301306584, + "grad_norm": 31.638559341430664, + "learning_rate": 1.4632916802094436e-06, + "loss": 0.1445, + "num_input_tokens_seen": 7684528, + "step": 15610 + }, + { + "epoch": 2.060842021908407, + "grad_norm": 0.046404916793107986, + "learning_rate": 1.462883357682605e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7687152, + "step": 15615 + }, + { + "epoch": 2.0615019136861554, + "grad_norm": 0.07641780376434326, + "learning_rate": 1.4624749369121265e-06, + "loss": 0.1174, + "num_input_tokens_seen": 7689712, + "step": 15620 + }, + { + "epoch": 2.0621618054639037, + "grad_norm": 0.0200422964990139, + "learning_rate": 1.4620664179846908e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7692144, + "step": 15625 + }, + { + "epoch": 2.0628216972416524, + "grad_norm": 0.03198925033211708, + "learning_rate": 1.4616578009870044e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7694320, + "step": 15630 + }, + { + "epoch": 2.0634815890194007, + "grad_norm": 6.981573104858398, + "learning_rate": 1.4612490860057927e-06, + "loss": 0.0008, + "num_input_tokens_seen": 7696752, + "step": 15635 + }, + { + "epoch": 2.0641414807971494, + "grad_norm": 0.24552814662456512, + "learning_rate": 1.4608402731278022e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7699184, + "step": 15640 + }, + { + "epoch": 2.0648013725748977, + "grad_norm": 0.013794873841106892, + "learning_rate": 1.4604313624398014e-06, + "loss": 0.1332, + "num_input_tokens_seen": 7701424, + "step": 15645 + }, + { + "epoch": 2.065461264352646, + "grad_norm": 0.0617479644715786, + "learning_rate": 1.4600223540285778e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7703856, + "step": 15650 + }, + { + "epoch": 2.0661211561303947, + "grad_norm": 0.01708795502781868, + "learning_rate": 1.459613247980941e-06, + "loss": 0.0737, + "num_input_tokens_seen": 7706224, + "step": 15655 + }, + { + "epoch": 2.066781047908143, + "grad_norm": 0.03478756919503212, + "learning_rate": 1.4592040443837203e-06, + "loss": 0.0006, + "num_input_tokens_seen": 7708400, + "step": 15660 + }, + { + "epoch": 2.0674409396858917, + "grad_norm": 0.016062593087553978, + "learning_rate": 1.458794743323767e-06, + "loss": 0.0015, + "num_input_tokens_seen": 7710960, + "step": 15665 + }, + { + "epoch": 2.06810083146364, + "grad_norm": 0.12953194975852966, + "learning_rate": 1.4583853448879513e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7713520, + "step": 15670 + }, + { + "epoch": 2.0687607232413883, + "grad_norm": 0.007021949626505375, + "learning_rate": 1.4579758491631655e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7715824, + "step": 15675 + }, + { + "epoch": 2.069420615019137, + "grad_norm": 0.01294905785471201, + "learning_rate": 1.4575662562363222e-06, + "loss": 0.0443, + "num_input_tokens_seen": 7718448, + "step": 15680 + }, + { + "epoch": 2.0700805067968853, + "grad_norm": 0.23416569828987122, + "learning_rate": 1.4571565661943542e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7720944, + "step": 15685 + }, + { + "epoch": 2.0707403985746335, + "grad_norm": 0.13015711307525635, + "learning_rate": 1.456746779124216e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7723760, + "step": 15690 + }, + { + "epoch": 2.0714002903523823, + "grad_norm": 0.0016105296090245247, + "learning_rate": 1.4563368951128812e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7726256, + "step": 15695 + }, + { + "epoch": 2.0720601821301305, + "grad_norm": 0.2471880316734314, + "learning_rate": 1.4559269142473452e-06, + "loss": 0.1174, + "num_input_tokens_seen": 7729136, + "step": 15700 + }, + { + "epoch": 2.0727200739078793, + "grad_norm": 0.050808388739824295, + "learning_rate": 1.455516836614623e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7731824, + "step": 15705 + }, + { + "epoch": 2.0733799656856275, + "grad_norm": 0.004038075916469097, + "learning_rate": 1.4551066623017505e-06, + "loss": 0.1329, + "num_input_tokens_seen": 7734128, + "step": 15710 + }, + { + "epoch": 2.074039857463376, + "grad_norm": 0.01952297054231167, + "learning_rate": 1.4546963913957848e-06, + "loss": 0.072, + "num_input_tokens_seen": 7736112, + "step": 15715 + }, + { + "epoch": 2.0746997492411245, + "grad_norm": 0.01668599434196949, + "learning_rate": 1.4542860239838025e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7738736, + "step": 15720 + }, + { + "epoch": 2.075359641018873, + "grad_norm": 0.05960208922624588, + "learning_rate": 1.4538755601529018e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7741424, + "step": 15725 + }, + { + "epoch": 2.0760195327966215, + "grad_norm": 0.05067654699087143, + "learning_rate": 1.4534649999901999e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7743664, + "step": 15730 + }, + { + "epoch": 2.07667942457437, + "grad_norm": 0.029946548864245415, + "learning_rate": 1.4530543435828355e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7746032, + "step": 15735 + }, + { + "epoch": 2.077339316352118, + "grad_norm": 0.03288107365369797, + "learning_rate": 1.4526435910179674e-06, + "loss": 0.0567, + "num_input_tokens_seen": 7748336, + "step": 15740 + }, + { + "epoch": 2.077999208129867, + "grad_norm": 0.006778739392757416, + "learning_rate": 1.4522327423827746e-06, + "loss": 0.1586, + "num_input_tokens_seen": 7750960, + "step": 15745 + }, + { + "epoch": 2.078659099907615, + "grad_norm": 0.030091719701886177, + "learning_rate": 1.4518217977644576e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7753776, + "step": 15750 + }, + { + "epoch": 2.079318991685364, + "grad_norm": 0.13405410945415497, + "learning_rate": 1.4514107572502355e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7756400, + "step": 15755 + }, + { + "epoch": 2.079978883463112, + "grad_norm": 15.214388847351074, + "learning_rate": 1.450999620927349e-06, + "loss": 0.0781, + "num_input_tokens_seen": 7758640, + "step": 15760 + }, + { + "epoch": 2.0806387752408604, + "grad_norm": 0.05948585271835327, + "learning_rate": 1.4505883888830591e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7761072, + "step": 15765 + }, + { + "epoch": 2.081298667018609, + "grad_norm": 0.054697006940841675, + "learning_rate": 1.4501770612046461e-06, + "loss": 0.001, + "num_input_tokens_seen": 7763696, + "step": 15770 + }, + { + "epoch": 2.0819585587963574, + "grad_norm": 0.06462718546390533, + "learning_rate": 1.4497656379794126e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7766192, + "step": 15775 + }, + { + "epoch": 2.0826184505741057, + "grad_norm": 0.007994703017175198, + "learning_rate": 1.4493541192946785e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7768752, + "step": 15780 + }, + { + "epoch": 2.0832783423518544, + "grad_norm": 0.03308379277586937, + "learning_rate": 1.448942505237787e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7771248, + "step": 15785 + }, + { + "epoch": 2.0839382341296027, + "grad_norm": 0.005272372625768185, + "learning_rate": 1.4485307958960996e-06, + "loss": 0.0, + "num_input_tokens_seen": 7774064, + "step": 15790 + }, + { + "epoch": 2.0845981259073514, + "grad_norm": 0.01093069277703762, + "learning_rate": 1.448118991356999e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7776496, + "step": 15795 + }, + { + "epoch": 2.0852580176850997, + "grad_norm": 0.01305568777024746, + "learning_rate": 1.4477070917078876e-06, + "loss": 0.1766, + "num_input_tokens_seen": 7778800, + "step": 15800 + }, + { + "epoch": 2.085917909462848, + "grad_norm": 0.0439567007124424, + "learning_rate": 1.4472950970361878e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7781168, + "step": 15805 + }, + { + "epoch": 2.0865778012405967, + "grad_norm": 0.179931640625, + "learning_rate": 1.4468830074293425e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7783792, + "step": 15810 + }, + { + "epoch": 2.087237693018345, + "grad_norm": 0.006239602342247963, + "learning_rate": 1.4464708229748154e-06, + "loss": 0.0104, + "num_input_tokens_seen": 7786288, + "step": 15815 + }, + { + "epoch": 2.087897584796093, + "grad_norm": 0.00521710142493248, + "learning_rate": 1.4460585437600887e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7788848, + "step": 15820 + }, + { + "epoch": 2.088557476573842, + "grad_norm": 0.02085597813129425, + "learning_rate": 1.4456461698726666e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7791472, + "step": 15825 + }, + { + "epoch": 2.08921736835159, + "grad_norm": 0.0025343787856400013, + "learning_rate": 1.445233701400072e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7794096, + "step": 15830 + }, + { + "epoch": 2.089877260129339, + "grad_norm": 0.0019459739560261369, + "learning_rate": 1.4448211384298482e-06, + "loss": 0.0923, + "num_input_tokens_seen": 7796464, + "step": 15835 + }, + { + "epoch": 2.090537151907087, + "grad_norm": 0.0659346804022789, + "learning_rate": 1.4444084810495589e-06, + "loss": 0.0005, + "num_input_tokens_seen": 7798832, + "step": 15840 + }, + { + "epoch": 2.0911970436848355, + "grad_norm": 22.533714294433594, + "learning_rate": 1.4439957293467877e-06, + "loss": 0.2503, + "num_input_tokens_seen": 7801456, + "step": 15845 + }, + { + "epoch": 2.091856935462584, + "grad_norm": 4.6341047286987305, + "learning_rate": 1.4435828834091384e-06, + "loss": 0.0013, + "num_input_tokens_seen": 7804016, + "step": 15850 + }, + { + "epoch": 2.0925168272403325, + "grad_norm": 0.018850073218345642, + "learning_rate": 1.443169943324234e-06, + "loss": 0.0461, + "num_input_tokens_seen": 7806384, + "step": 15855 + }, + { + "epoch": 2.0931767190180812, + "grad_norm": 0.011577551253139973, + "learning_rate": 1.4427569091797182e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7808496, + "step": 15860 + }, + { + "epoch": 2.0938366107958295, + "grad_norm": 0.012284616008400917, + "learning_rate": 1.442343781063255e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7810992, + "step": 15865 + }, + { + "epoch": 2.094496502573578, + "grad_norm": 0.03962375968694687, + "learning_rate": 1.441930559062527e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7813680, + "step": 15870 + }, + { + "epoch": 2.0951563943513265, + "grad_norm": 14.506804466247559, + "learning_rate": 1.4415172432652385e-06, + "loss": 0.0673, + "num_input_tokens_seen": 7816304, + "step": 15875 + }, + { + "epoch": 2.095816286129075, + "grad_norm": 0.02910180203616619, + "learning_rate": 1.441103833759112e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7818864, + "step": 15880 + }, + { + "epoch": 2.0964761779068235, + "grad_norm": 0.022448379546403885, + "learning_rate": 1.4406903306318913e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7821168, + "step": 15885 + }, + { + "epoch": 2.097136069684572, + "grad_norm": 0.026671426370739937, + "learning_rate": 1.440276733971339e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7823920, + "step": 15890 + }, + { + "epoch": 2.09779596146232, + "grad_norm": 0.017572835087776184, + "learning_rate": 1.439863043865238e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7826224, + "step": 15895 + }, + { + "epoch": 2.098455853240069, + "grad_norm": 0.0058591896668076515, + "learning_rate": 1.4394492604013914e-06, + "loss": 0.0, + "num_input_tokens_seen": 7828720, + "step": 15900 + }, + { + "epoch": 2.099115745017817, + "grad_norm": 0.003320800606161356, + "learning_rate": 1.4390353836676217e-06, + "loss": 0.0554, + "num_input_tokens_seen": 7831344, + "step": 15905 + }, + { + "epoch": 2.0997756367955653, + "grad_norm": 0.019994355738162994, + "learning_rate": 1.4386214137517707e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7833840, + "step": 15910 + }, + { + "epoch": 2.100435528573314, + "grad_norm": 0.07288461178541183, + "learning_rate": 1.438207350741701e-06, + "loss": 0.0659, + "num_input_tokens_seen": 7836016, + "step": 15915 + }, + { + "epoch": 2.1010954203510623, + "grad_norm": 0.009932564571499825, + "learning_rate": 1.4377931947252943e-06, + "loss": 0.0295, + "num_input_tokens_seen": 7838768, + "step": 15920 + }, + { + "epoch": 2.101755312128811, + "grad_norm": 0.009432883001863956, + "learning_rate": 1.4373789457904522e-06, + "loss": 0.0581, + "num_input_tokens_seen": 7841328, + "step": 15925 + }, + { + "epoch": 2.1024152039065593, + "grad_norm": 0.013211063109338284, + "learning_rate": 1.4369646040250962e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7843760, + "step": 15930 + }, + { + "epoch": 2.1030750956843076, + "grad_norm": 0.018002096563577652, + "learning_rate": 1.4365501695171673e-06, + "loss": 0.0908, + "num_input_tokens_seen": 7846512, + "step": 15935 + }, + { + "epoch": 2.1037349874620563, + "grad_norm": 0.10871543735265732, + "learning_rate": 1.436135642354626e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7849072, + "step": 15940 + }, + { + "epoch": 2.1043948792398046, + "grad_norm": 0.051549457013607025, + "learning_rate": 1.4357210226254533e-06, + "loss": 0.088, + "num_input_tokens_seen": 7851632, + "step": 15945 + }, + { + "epoch": 2.105054771017553, + "grad_norm": 0.09863109886646271, + "learning_rate": 1.435306310417648e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7854064, + "step": 15950 + }, + { + "epoch": 2.1057146627953016, + "grad_norm": 0.09837587922811508, + "learning_rate": 1.4348915058192316e-06, + "loss": 0.0478, + "num_input_tokens_seen": 7856752, + "step": 15955 + }, + { + "epoch": 2.10637455457305, + "grad_norm": 23.637500762939453, + "learning_rate": 1.4344766089182416e-06, + "loss": 0.0468, + "num_input_tokens_seen": 7859376, + "step": 15960 + }, + { + "epoch": 2.1070344463507986, + "grad_norm": 0.008645118214190006, + "learning_rate": 1.4340616198027377e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7862000, + "step": 15965 + }, + { + "epoch": 2.107694338128547, + "grad_norm": 25.188690185546875, + "learning_rate": 1.4336465385607982e-06, + "loss": 0.0612, + "num_input_tokens_seen": 7864304, + "step": 15970 + }, + { + "epoch": 2.108354229906295, + "grad_norm": 0.2091568261384964, + "learning_rate": 1.433231365280521e-06, + "loss": 0.1067, + "num_input_tokens_seen": 7866608, + "step": 15975 + }, + { + "epoch": 2.109014121684044, + "grad_norm": 0.03102351725101471, + "learning_rate": 1.432816100050024e-06, + "loss": 0.0596, + "num_input_tokens_seen": 7869040, + "step": 15980 + }, + { + "epoch": 2.109674013461792, + "grad_norm": 0.0035080285742878914, + "learning_rate": 1.432400742957444e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7871792, + "step": 15985 + }, + { + "epoch": 2.110333905239541, + "grad_norm": 0.003127987729385495, + "learning_rate": 1.4319852940909377e-06, + "loss": 0.001, + "num_input_tokens_seen": 7874160, + "step": 15990 + }, + { + "epoch": 2.110993797017289, + "grad_norm": 0.023723382502794266, + "learning_rate": 1.4315697535386804e-06, + "loss": 0.0029, + "num_input_tokens_seen": 7876656, + "step": 15995 + }, + { + "epoch": 2.1116536887950375, + "grad_norm": 35.08774948120117, + "learning_rate": 1.4311541213888682e-06, + "loss": 0.0551, + "num_input_tokens_seen": 7879280, + "step": 16000 + }, + { + "epoch": 2.112313580572786, + "grad_norm": 0.024285173043608665, + "learning_rate": 1.430738397729716e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7881712, + "step": 16005 + }, + { + "epoch": 2.1129734723505345, + "grad_norm": 0.08313702791929245, + "learning_rate": 1.4303225826494583e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7883952, + "step": 16010 + }, + { + "epoch": 2.113633364128283, + "grad_norm": 0.058101836591959, + "learning_rate": 1.4299066762363484e-06, + "loss": 0.0488, + "num_input_tokens_seen": 7886320, + "step": 16015 + }, + { + "epoch": 2.1142932559060315, + "grad_norm": 0.0922577753663063, + "learning_rate": 1.4294906785786593e-06, + "loss": 0.0273, + "num_input_tokens_seen": 7889008, + "step": 16020 + }, + { + "epoch": 2.1149531476837797, + "grad_norm": 18.65223503112793, + "learning_rate": 1.429074589764684e-06, + "loss": 0.0751, + "num_input_tokens_seen": 7891376, + "step": 16025 + }, + { + "epoch": 2.1156130394615285, + "grad_norm": 0.26984599232673645, + "learning_rate": 1.4286584098827343e-06, + "loss": 0.0536, + "num_input_tokens_seen": 7893616, + "step": 16030 + }, + { + "epoch": 2.1162729312392767, + "grad_norm": 0.010782040655612946, + "learning_rate": 1.4282421390211411e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7895984, + "step": 16035 + }, + { + "epoch": 2.116932823017025, + "grad_norm": 0.0064732348546385765, + "learning_rate": 1.4278257772682548e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7898224, + "step": 16040 + }, + { + "epoch": 2.1175927147947737, + "grad_norm": 0.007527779787778854, + "learning_rate": 1.4274093247124456e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7900656, + "step": 16045 + }, + { + "epoch": 2.118252606572522, + "grad_norm": 0.0363912433385849, + "learning_rate": 1.4269927814421023e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7903152, + "step": 16050 + }, + { + "epoch": 2.1189124983502707, + "grad_norm": 0.004254709463566542, + "learning_rate": 1.426576147545633e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7905520, + "step": 16055 + }, + { + "epoch": 2.119572390128019, + "grad_norm": 0.03672404587268829, + "learning_rate": 1.4261594231114658e-06, + "loss": 0.0009, + "num_input_tokens_seen": 7907952, + "step": 16060 + }, + { + "epoch": 2.1202322819057673, + "grad_norm": 0.0029204622842371464, + "learning_rate": 1.4257426082280466e-06, + "loss": 0.1114, + "num_input_tokens_seen": 7910384, + "step": 16065 + }, + { + "epoch": 2.120892173683516, + "grad_norm": 0.42951253056526184, + "learning_rate": 1.4253257029838419e-06, + "loss": 0.009, + "num_input_tokens_seen": 7912880, + "step": 16070 + }, + { + "epoch": 2.1215520654612643, + "grad_norm": 101.55767822265625, + "learning_rate": 1.4249087074673367e-06, + "loss": 0.1324, + "num_input_tokens_seen": 7915184, + "step": 16075 + }, + { + "epoch": 2.122211957239013, + "grad_norm": 0.0719396322965622, + "learning_rate": 1.4244916217670352e-06, + "loss": 0.0003, + "num_input_tokens_seen": 7917424, + "step": 16080 + }, + { + "epoch": 2.1228718490167613, + "grad_norm": 0.010440871119499207, + "learning_rate": 1.4240744459714612e-06, + "loss": 0.0015, + "num_input_tokens_seen": 7919600, + "step": 16085 + }, + { + "epoch": 2.1235317407945096, + "grad_norm": 0.004497972317039967, + "learning_rate": 1.4236571801691568e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7922224, + "step": 16090 + }, + { + "epoch": 2.1241916325722583, + "grad_norm": 37.28203582763672, + "learning_rate": 1.4232398244486835e-06, + "loss": 0.0682, + "num_input_tokens_seen": 7924464, + "step": 16095 + }, + { + "epoch": 2.1248515243500066, + "grad_norm": 0.0008809241116978228, + "learning_rate": 1.4228223788986226e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7927088, + "step": 16100 + }, + { + "epoch": 2.125511416127755, + "grad_norm": 0.21736837923526764, + "learning_rate": 1.4224048436075738e-06, + "loss": 0.1128, + "num_input_tokens_seen": 7929648, + "step": 16105 + }, + { + "epoch": 2.1261713079055036, + "grad_norm": 0.008032063953578472, + "learning_rate": 1.4219872186641557e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7932016, + "step": 16110 + }, + { + "epoch": 2.126831199683252, + "grad_norm": 0.014576790854334831, + "learning_rate": 1.421569504157006e-06, + "loss": 0.0535, + "num_input_tokens_seen": 7934576, + "step": 16115 + }, + { + "epoch": 2.1274910914610006, + "grad_norm": 0.037164539098739624, + "learning_rate": 1.4211517001747818e-06, + "loss": 0.0491, + "num_input_tokens_seen": 7936880, + "step": 16120 + }, + { + "epoch": 2.128150983238749, + "grad_norm": 0.09125718474388123, + "learning_rate": 1.420733806806159e-06, + "loss": 0.0007, + "num_input_tokens_seen": 7939248, + "step": 16125 + }, + { + "epoch": 2.128810875016497, + "grad_norm": 0.02172080986201763, + "learning_rate": 1.4203158241398329e-06, + "loss": 0.1307, + "num_input_tokens_seen": 7941936, + "step": 16130 + }, + { + "epoch": 2.129470766794246, + "grad_norm": 0.02687636949121952, + "learning_rate": 1.4198977522645162e-06, + "loss": 0.1063, + "num_input_tokens_seen": 7944304, + "step": 16135 + }, + { + "epoch": 2.130130658571994, + "grad_norm": 19.126834869384766, + "learning_rate": 1.4194795912689426e-06, + "loss": 0.1061, + "num_input_tokens_seen": 7946544, + "step": 16140 + }, + { + "epoch": 2.130790550349743, + "grad_norm": 0.009625761769711971, + "learning_rate": 1.419061341241863e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7949424, + "step": 16145 + }, + { + "epoch": 2.131450442127491, + "grad_norm": 0.004309684503823519, + "learning_rate": 1.4186430022720488e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7951856, + "step": 16150 + }, + { + "epoch": 2.1321103339052394, + "grad_norm": 0.05872798711061478, + "learning_rate": 1.4182245744482886e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7953968, + "step": 16155 + }, + { + "epoch": 2.132770225682988, + "grad_norm": 0.02588357776403427, + "learning_rate": 1.4178060578593912e-06, + "loss": 0.1079, + "num_input_tokens_seen": 7956464, + "step": 16160 + }, + { + "epoch": 2.1334301174607364, + "grad_norm": 0.019361227750778198, + "learning_rate": 1.4173874525941836e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7958896, + "step": 16165 + }, + { + "epoch": 2.1340900092384847, + "grad_norm": 0.11392463743686676, + "learning_rate": 1.4169687587415114e-06, + "loss": 0.0126, + "num_input_tokens_seen": 7961328, + "step": 16170 + }, + { + "epoch": 2.1347499010162334, + "grad_norm": 0.0023702646140009165, + "learning_rate": 1.4165499763902399e-06, + "loss": 0.0691, + "num_input_tokens_seen": 7963888, + "step": 16175 + }, + { + "epoch": 2.1354097927939817, + "grad_norm": 0.014895014464855194, + "learning_rate": 1.416131105629252e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7966512, + "step": 16180 + }, + { + "epoch": 2.1360696845717304, + "grad_norm": 0.05524078384041786, + "learning_rate": 1.4157121465474504e-06, + "loss": 0.0002, + "num_input_tokens_seen": 7968944, + "step": 16185 + }, + { + "epoch": 2.1367295763494787, + "grad_norm": 0.05119001865386963, + "learning_rate": 1.4152930992337562e-06, + "loss": 0.0018, + "num_input_tokens_seen": 7971440, + "step": 16190 + }, + { + "epoch": 2.137389468127227, + "grad_norm": 0.04204750433564186, + "learning_rate": 1.4148739637771088e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7973744, + "step": 16195 + }, + { + "epoch": 2.1380493599049757, + "grad_norm": 0.5487294793128967, + "learning_rate": 1.4144547402664674e-06, + "loss": 0.0523, + "num_input_tokens_seen": 7975920, + "step": 16200 + }, + { + "epoch": 2.138709251682724, + "grad_norm": 0.0028204945847392082, + "learning_rate": 1.4140354287908079e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7978224, + "step": 16205 + }, + { + "epoch": 2.1393691434604727, + "grad_norm": 0.18289142847061157, + "learning_rate": 1.4136160294391272e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7980592, + "step": 16210 + }, + { + "epoch": 2.140029035238221, + "grad_norm": 0.016940122470259666, + "learning_rate": 1.4131965423004394e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7982896, + "step": 16215 + }, + { + "epoch": 2.1406889270159692, + "grad_norm": 0.5715939402580261, + "learning_rate": 1.4127769674637777e-06, + "loss": 0.0297, + "num_input_tokens_seen": 7985072, + "step": 16220 + }, + { + "epoch": 2.141348818793718, + "grad_norm": 0.0023537829983979464, + "learning_rate": 1.4123573050181937e-06, + "loss": 0.1126, + "num_input_tokens_seen": 7987824, + "step": 16225 + }, + { + "epoch": 2.1420087105714662, + "grad_norm": 1.0796823501586914, + "learning_rate": 1.4119375550527578e-06, + "loss": 0.0004, + "num_input_tokens_seen": 7990256, + "step": 16230 + }, + { + "epoch": 2.1426686023492145, + "grad_norm": 0.010677073150873184, + "learning_rate": 1.4115177176565587e-06, + "loss": 0.0001, + "num_input_tokens_seen": 7992944, + "step": 16235 + }, + { + "epoch": 2.1433284941269632, + "grad_norm": 0.002611901145428419, + "learning_rate": 1.4110977929187042e-06, + "loss": 0.0803, + "num_input_tokens_seen": 7995440, + "step": 16240 + }, + { + "epoch": 2.1439883859047115, + "grad_norm": 0.21536973118782043, + "learning_rate": 1.41067778092832e-06, + "loss": 0.0956, + "num_input_tokens_seen": 7997808, + "step": 16245 + }, + { + "epoch": 2.1446482776824602, + "grad_norm": 0.017482072114944458, + "learning_rate": 1.4102576817745506e-06, + "loss": 0.0798, + "num_input_tokens_seen": 8000304, + "step": 16250 + }, + { + "epoch": 2.1453081694602085, + "grad_norm": 0.03651322424411774, + "learning_rate": 1.4098374955465592e-06, + "loss": 0.1273, + "num_input_tokens_seen": 8002992, + "step": 16255 + }, + { + "epoch": 2.145968061237957, + "grad_norm": 0.023659037426114082, + "learning_rate": 1.409417222333527e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8005488, + "step": 16260 + }, + { + "epoch": 2.1466279530157055, + "grad_norm": 0.009940829128026962, + "learning_rate": 1.4089968622246543e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8008240, + "step": 16265 + }, + { + "epoch": 2.147287844793454, + "grad_norm": 0.062029287219047546, + "learning_rate": 1.4085764153091595e-06, + "loss": 0.1245, + "num_input_tokens_seen": 8010864, + "step": 16270 + }, + { + "epoch": 2.1479477365712025, + "grad_norm": 0.09726342558860779, + "learning_rate": 1.4081558816762788e-06, + "loss": 0.0894, + "num_input_tokens_seen": 8013296, + "step": 16275 + }, + { + "epoch": 2.148607628348951, + "grad_norm": 0.04445560276508331, + "learning_rate": 1.4077352614152683e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8016048, + "step": 16280 + }, + { + "epoch": 2.149267520126699, + "grad_norm": 0.030557597056031227, + "learning_rate": 1.407314554615401e-06, + "loss": 0.0012, + "num_input_tokens_seen": 8018480, + "step": 16285 + }, + { + "epoch": 2.149927411904448, + "grad_norm": 0.13699224591255188, + "learning_rate": 1.406893761365969e-06, + "loss": 0.0413, + "num_input_tokens_seen": 8021040, + "step": 16290 + }, + { + "epoch": 2.150587303682196, + "grad_norm": 0.1511172652244568, + "learning_rate": 1.4064728817562825e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8023664, + "step": 16295 + }, + { + "epoch": 2.1512471954599444, + "grad_norm": 0.06514198333024979, + "learning_rate": 1.4060519158756702e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8025712, + "step": 16300 + }, + { + "epoch": 2.151907087237693, + "grad_norm": 0.018014973029494286, + "learning_rate": 1.4056308638134794e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8028208, + "step": 16305 + }, + { + "epoch": 2.1525669790154414, + "grad_norm": 0.19428548216819763, + "learning_rate": 1.4052097256590752e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8030640, + "step": 16310 + }, + { + "epoch": 2.15322687079319, + "grad_norm": 0.05569561943411827, + "learning_rate": 1.4047885015018407e-06, + "loss": 0.0613, + "num_input_tokens_seen": 8033136, + "step": 16315 + }, + { + "epoch": 2.1538867625709384, + "grad_norm": 0.44556447863578796, + "learning_rate": 1.4043671914311785e-06, + "loss": 0.054, + "num_input_tokens_seen": 8035696, + "step": 16320 + }, + { + "epoch": 2.1545466543486866, + "grad_norm": 0.011591610498726368, + "learning_rate": 1.4039457955365077e-06, + "loss": 0.0348, + "num_input_tokens_seen": 8038448, + "step": 16325 + }, + { + "epoch": 2.1552065461264354, + "grad_norm": 0.05333820357918739, + "learning_rate": 1.403524313907267e-06, + "loss": 0.1972, + "num_input_tokens_seen": 8040944, + "step": 16330 + }, + { + "epoch": 2.1558664379041836, + "grad_norm": 0.01803092285990715, + "learning_rate": 1.403102746632913e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8043312, + "step": 16335 + }, + { + "epoch": 2.1565263296819324, + "grad_norm": 0.07055540382862091, + "learning_rate": 1.4026810938029197e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8045872, + "step": 16340 + }, + { + "epoch": 2.1571862214596806, + "grad_norm": 22.134244918823242, + "learning_rate": 1.4022593555067804e-06, + "loss": 0.0644, + "num_input_tokens_seen": 8048624, + "step": 16345 + }, + { + "epoch": 2.157846113237429, + "grad_norm": 0.012910672463476658, + "learning_rate": 1.401837531834006e-06, + "loss": 0.1225, + "num_input_tokens_seen": 8050864, + "step": 16350 + }, + { + "epoch": 2.1585060050151776, + "grad_norm": 0.1114889532327652, + "learning_rate": 1.401415622874125e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8053616, + "step": 16355 + }, + { + "epoch": 2.159165896792926, + "grad_norm": 0.013317722827196121, + "learning_rate": 1.400993628716685e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8056048, + "step": 16360 + }, + { + "epoch": 2.159825788570674, + "grad_norm": 0.010530170984566212, + "learning_rate": 1.400571549451251e-06, + "loss": 0.0399, + "num_input_tokens_seen": 8058288, + "step": 16365 + }, + { + "epoch": 2.160485680348423, + "grad_norm": 0.020479438826441765, + "learning_rate": 1.4001493851674066e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8060592, + "step": 16370 + }, + { + "epoch": 2.161145572126171, + "grad_norm": 0.02857316844165325, + "learning_rate": 1.3997271359547529e-06, + "loss": 0.0229, + "num_input_tokens_seen": 8062960, + "step": 16375 + }, + { + "epoch": 2.16180546390392, + "grad_norm": 0.04585658758878708, + "learning_rate": 1.3993048019029088e-06, + "loss": 0.0017, + "num_input_tokens_seen": 8065584, + "step": 16380 + }, + { + "epoch": 2.162465355681668, + "grad_norm": 0.01887671649456024, + "learning_rate": 1.3988823831015125e-06, + "loss": 0.0704, + "num_input_tokens_seen": 8068144, + "step": 16385 + }, + { + "epoch": 2.1631252474594165, + "grad_norm": 0.04641466215252876, + "learning_rate": 1.3984598796402183e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8070384, + "step": 16390 + }, + { + "epoch": 2.163785139237165, + "grad_norm": 0.010708236135542393, + "learning_rate": 1.3980372916087006e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8072816, + "step": 16395 + }, + { + "epoch": 2.1644450310149135, + "grad_norm": 0.1834602952003479, + "learning_rate": 1.3976146190966498e-06, + "loss": 0.0014, + "num_input_tokens_seen": 8075184, + "step": 16400 + }, + { + "epoch": 2.165104922792662, + "grad_norm": 0.07067617774009705, + "learning_rate": 1.3971918621937756e-06, + "loss": 0.0581, + "num_input_tokens_seen": 8077424, + "step": 16405 + }, + { + "epoch": 2.1657648145704105, + "grad_norm": 0.02065001055598259, + "learning_rate": 1.3967690209898046e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8080048, + "step": 16410 + }, + { + "epoch": 2.1664247063481588, + "grad_norm": 0.2887703776359558, + "learning_rate": 1.3963460955744824e-06, + "loss": 0.0478, + "num_input_tokens_seen": 8082416, + "step": 16415 + }, + { + "epoch": 2.1670845981259075, + "grad_norm": 0.024521052837371826, + "learning_rate": 1.3959230860375716e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8085104, + "step": 16420 + }, + { + "epoch": 2.1677444899036558, + "grad_norm": 0.3461296856403351, + "learning_rate": 1.3954999924688522e-06, + "loss": 0.1084, + "num_input_tokens_seen": 8087408, + "step": 16425 + }, + { + "epoch": 2.1684043816814045, + "grad_norm": 0.10695258527994156, + "learning_rate": 1.395076814958124e-06, + "loss": 0.0489, + "num_input_tokens_seen": 8089456, + "step": 16430 + }, + { + "epoch": 2.1690642734591528, + "grad_norm": 0.026660706847906113, + "learning_rate": 1.3946535535952024e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8092336, + "step": 16435 + }, + { + "epoch": 2.169724165236901, + "grad_norm": 14.826727867126465, + "learning_rate": 1.394230208469922e-06, + "loss": 0.0369, + "num_input_tokens_seen": 8094640, + "step": 16440 + }, + { + "epoch": 2.1703840570146498, + "grad_norm": 24.21205711364746, + "learning_rate": 1.3938067796721349e-06, + "loss": 0.0711, + "num_input_tokens_seen": 8097072, + "step": 16445 + }, + { + "epoch": 2.171043948792398, + "grad_norm": 0.30090898275375366, + "learning_rate": 1.3933832672917101e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8099504, + "step": 16450 + }, + { + "epoch": 2.1717038405701463, + "grad_norm": 0.12428780645132065, + "learning_rate": 1.3929596714185357e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8101680, + "step": 16455 + }, + { + "epoch": 2.172363732347895, + "grad_norm": 1.8694030046463013, + "learning_rate": 1.3925359921425166e-06, + "loss": 0.1802, + "num_input_tokens_seen": 8104304, + "step": 16460 + }, + { + "epoch": 2.1730236241256433, + "grad_norm": 0.07084174454212189, + "learning_rate": 1.3921122295535756e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8106672, + "step": 16465 + }, + { + "epoch": 2.173683515903392, + "grad_norm": 0.5130443572998047, + "learning_rate": 1.3916883837416536e-06, + "loss": 0.091, + "num_input_tokens_seen": 8108976, + "step": 16470 + }, + { + "epoch": 2.1743434076811403, + "grad_norm": 0.029886798933148384, + "learning_rate": 1.3912644547967085e-06, + "loss": 0.004, + "num_input_tokens_seen": 8111408, + "step": 16475 + }, + { + "epoch": 2.1750032994588886, + "grad_norm": 0.023892467841506004, + "learning_rate": 1.390840442808716e-06, + "loss": 0.0458, + "num_input_tokens_seen": 8113904, + "step": 16480 + }, + { + "epoch": 2.1756631912366373, + "grad_norm": 0.04723944514989853, + "learning_rate": 1.3904163478676698e-06, + "loss": 0.1022, + "num_input_tokens_seen": 8116336, + "step": 16485 + }, + { + "epoch": 2.1763230830143856, + "grad_norm": 0.05369102954864502, + "learning_rate": 1.3899921700635808e-06, + "loss": 0.0318, + "num_input_tokens_seen": 8118640, + "step": 16490 + }, + { + "epoch": 2.176982974792134, + "grad_norm": 84.5366439819336, + "learning_rate": 1.389567909486478e-06, + "loss": 0.0517, + "num_input_tokens_seen": 8121008, + "step": 16495 + }, + { + "epoch": 2.1776428665698826, + "grad_norm": 15.742450714111328, + "learning_rate": 1.3891435662264077e-06, + "loss": 0.065, + "num_input_tokens_seen": 8123632, + "step": 16500 + }, + { + "epoch": 2.178302758347631, + "grad_norm": 0.06588709354400635, + "learning_rate": 1.3887191403734328e-06, + "loss": 0.0667, + "num_input_tokens_seen": 8126256, + "step": 16505 + }, + { + "epoch": 2.1789626501253796, + "grad_norm": 0.011470166966319084, + "learning_rate": 1.3882946320176358e-06, + "loss": 0.0536, + "num_input_tokens_seen": 8129072, + "step": 16510 + }, + { + "epoch": 2.179622541903128, + "grad_norm": 12.762123107910156, + "learning_rate": 1.3878700412491147e-06, + "loss": 0.0269, + "num_input_tokens_seen": 8131632, + "step": 16515 + }, + { + "epoch": 2.180282433680876, + "grad_norm": 0.002766258781775832, + "learning_rate": 1.3874453681579861e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8134192, + "step": 16520 + }, + { + "epoch": 2.180942325458625, + "grad_norm": 0.00879225879907608, + "learning_rate": 1.3870206128343838e-06, + "loss": 0.0014, + "num_input_tokens_seen": 8136432, + "step": 16525 + }, + { + "epoch": 2.181602217236373, + "grad_norm": 0.005440095905214548, + "learning_rate": 1.386595775368459e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8138544, + "step": 16530 + }, + { + "epoch": 2.182262109014122, + "grad_norm": 12.968379020690918, + "learning_rate": 1.3861708558503804e-06, + "loss": 0.0551, + "num_input_tokens_seen": 8140976, + "step": 16535 + }, + { + "epoch": 2.18292200079187, + "grad_norm": 0.006056667771190405, + "learning_rate": 1.385745854370334e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8143344, + "step": 16540 + }, + { + "epoch": 2.1835818925696184, + "grad_norm": 0.05108082666993141, + "learning_rate": 1.3853207710185233e-06, + "loss": 0.0352, + "num_input_tokens_seen": 8145392, + "step": 16545 + }, + { + "epoch": 2.184241784347367, + "grad_norm": 0.06454239040613174, + "learning_rate": 1.3848956058851695e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8148080, + "step": 16550 + }, + { + "epoch": 2.1849016761251154, + "grad_norm": 24.318828582763672, + "learning_rate": 1.3844703590605105e-06, + "loss": 0.0695, + "num_input_tokens_seen": 8150448, + "step": 16555 + }, + { + "epoch": 2.185561567902864, + "grad_norm": 0.02337341383099556, + "learning_rate": 1.3840450306348017e-06, + "loss": 0.109, + "num_input_tokens_seen": 8152880, + "step": 16560 + }, + { + "epoch": 2.1862214596806124, + "grad_norm": 0.4075833261013031, + "learning_rate": 1.3836196206983162e-06, + "loss": 0.0972, + "num_input_tokens_seen": 8155248, + "step": 16565 + }, + { + "epoch": 2.1868813514583607, + "grad_norm": 0.1106967106461525, + "learning_rate": 1.3831941293413443e-06, + "loss": 0.0592, + "num_input_tokens_seen": 8157808, + "step": 16570 + }, + { + "epoch": 2.1875412432361094, + "grad_norm": 1.1730084419250488, + "learning_rate": 1.3827685566541934e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8160368, + "step": 16575 + }, + { + "epoch": 2.1882011350138577, + "grad_norm": 0.01301812008023262, + "learning_rate": 1.382342902727188e-06, + "loss": 0.0492, + "num_input_tokens_seen": 8162544, + "step": 16580 + }, + { + "epoch": 2.188861026791606, + "grad_norm": 0.295091837644577, + "learning_rate": 1.38191716765067e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8165168, + "step": 16585 + }, + { + "epoch": 2.1895209185693547, + "grad_norm": 0.033473141491413116, + "learning_rate": 1.381491351514999e-06, + "loss": 0.0538, + "num_input_tokens_seen": 8167472, + "step": 16590 + }, + { + "epoch": 2.190180810347103, + "grad_norm": 0.18719351291656494, + "learning_rate": 1.3810654544105512e-06, + "loss": 0.0013, + "num_input_tokens_seen": 8169840, + "step": 16595 + }, + { + "epoch": 2.1908407021248517, + "grad_norm": 0.01855543442070484, + "learning_rate": 1.38063947642772e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8172272, + "step": 16600 + }, + { + "epoch": 2.1915005939026, + "grad_norm": 0.2745734453201294, + "learning_rate": 1.3802134176569166e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8174640, + "step": 16605 + }, + { + "epoch": 2.1921604856803483, + "grad_norm": 0.03164679929614067, + "learning_rate": 1.3797872781885685e-06, + "loss": 0.0298, + "num_input_tokens_seen": 8177264, + "step": 16610 + }, + { + "epoch": 2.192820377458097, + "grad_norm": 0.028849711641669273, + "learning_rate": 1.3793610581131207e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8179504, + "step": 16615 + }, + { + "epoch": 2.1934802692358453, + "grad_norm": 0.004540000110864639, + "learning_rate": 1.3789347575210352e-06, + "loss": 0.0782, + "num_input_tokens_seen": 8182192, + "step": 16620 + }, + { + "epoch": 2.1941401610135935, + "grad_norm": 0.004751342348754406, + "learning_rate": 1.3785083765027919e-06, + "loss": 0.0031, + "num_input_tokens_seen": 8184496, + "step": 16625 + }, + { + "epoch": 2.1948000527913423, + "grad_norm": 0.06377559155225754, + "learning_rate": 1.3780819151488865e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8186864, + "step": 16630 + }, + { + "epoch": 2.1954599445690905, + "grad_norm": 0.013530077412724495, + "learning_rate": 1.3776553735498321e-06, + "loss": 0.2403, + "num_input_tokens_seen": 8189168, + "step": 16635 + }, + { + "epoch": 2.1961198363468393, + "grad_norm": 12.607081413269043, + "learning_rate": 1.37722875179616e-06, + "loss": 0.0315, + "num_input_tokens_seen": 8191536, + "step": 16640 + }, + { + "epoch": 2.1967797281245875, + "grad_norm": 0.0664307028055191, + "learning_rate": 1.3768020499784165e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8194352, + "step": 16645 + }, + { + "epoch": 2.197439619902336, + "grad_norm": 0.12778986990451813, + "learning_rate": 1.3763752681871669e-06, + "loss": 0.0444, + "num_input_tokens_seen": 8196784, + "step": 16650 + }, + { + "epoch": 2.1980995116800846, + "grad_norm": 0.8269379734992981, + "learning_rate": 1.375948406512992e-06, + "loss": 0.0195, + "num_input_tokens_seen": 8199216, + "step": 16655 + }, + { + "epoch": 2.198759403457833, + "grad_norm": 0.008877968415617943, + "learning_rate": 1.3755214650464903e-06, + "loss": 0.071, + "num_input_tokens_seen": 8201456, + "step": 16660 + }, + { + "epoch": 2.1994192952355816, + "grad_norm": 0.0728282779455185, + "learning_rate": 1.3750944438782769e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8203568, + "step": 16665 + }, + { + "epoch": 2.20007918701333, + "grad_norm": 0.18286637961864471, + "learning_rate": 1.374667343098984e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8205872, + "step": 16670 + }, + { + "epoch": 2.200739078791078, + "grad_norm": 0.011909585446119308, + "learning_rate": 1.3742401627992604e-06, + "loss": 0.0201, + "num_input_tokens_seen": 8208432, + "step": 16675 + }, + { + "epoch": 2.201398970568827, + "grad_norm": 15.967900276184082, + "learning_rate": 1.3738129030697724e-06, + "loss": 0.2684, + "num_input_tokens_seen": 8210928, + "step": 16680 + }, + { + "epoch": 2.202058862346575, + "grad_norm": 0.08457144349813461, + "learning_rate": 1.3733855640012028e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8213168, + "step": 16685 + }, + { + "epoch": 2.202718754124324, + "grad_norm": 0.2429049015045166, + "learning_rate": 1.372958145684251e-06, + "loss": 0.1604, + "num_input_tokens_seen": 8215536, + "step": 16690 + }, + { + "epoch": 2.203378645902072, + "grad_norm": 2.9228296279907227, + "learning_rate": 1.3725306482096337e-06, + "loss": 0.0022, + "num_input_tokens_seen": 8217904, + "step": 16695 + }, + { + "epoch": 2.2040385376798204, + "grad_norm": 0.39163509011268616, + "learning_rate": 1.3721030716680835e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8220208, + "step": 16700 + }, + { + "epoch": 2.204698429457569, + "grad_norm": 0.05875394865870476, + "learning_rate": 1.3716754161503514e-06, + "loss": 0.0322, + "num_input_tokens_seen": 8222832, + "step": 16705 + }, + { + "epoch": 2.2053583212353174, + "grad_norm": 0.012448856607079506, + "learning_rate": 1.3712476817472037e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8225264, + "step": 16710 + }, + { + "epoch": 2.2060182130130657, + "grad_norm": 0.04935499653220177, + "learning_rate": 1.3708198685494234e-06, + "loss": 0.1097, + "num_input_tokens_seen": 8227632, + "step": 16715 + }, + { + "epoch": 2.2066781047908144, + "grad_norm": 0.1539801061153412, + "learning_rate": 1.3703919766478116e-06, + "loss": 0.0618, + "num_input_tokens_seen": 8230448, + "step": 16720 + }, + { + "epoch": 2.2073379965685627, + "grad_norm": 0.03219306468963623, + "learning_rate": 1.369964006133185e-06, + "loss": 0.1022, + "num_input_tokens_seen": 8233008, + "step": 16725 + }, + { + "epoch": 2.2079978883463114, + "grad_norm": 1.6212204694747925, + "learning_rate": 1.3695359570963772e-06, + "loss": 0.0281, + "num_input_tokens_seen": 8235568, + "step": 16730 + }, + { + "epoch": 2.2086577801240597, + "grad_norm": 0.15697598457336426, + "learning_rate": 1.3691078296282383e-06, + "loss": 0.0354, + "num_input_tokens_seen": 8237744, + "step": 16735 + }, + { + "epoch": 2.209317671901808, + "grad_norm": 0.5345177054405212, + "learning_rate": 1.3686796238196357e-06, + "loss": 0.0026, + "num_input_tokens_seen": 8240368, + "step": 16740 + }, + { + "epoch": 2.2099775636795567, + "grad_norm": 34.04423904418945, + "learning_rate": 1.3682513397614522e-06, + "loss": 0.127, + "num_input_tokens_seen": 8242800, + "step": 16745 + }, + { + "epoch": 2.210637455457305, + "grad_norm": 0.002000207779929042, + "learning_rate": 1.367822977544589e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8245232, + "step": 16750 + }, + { + "epoch": 2.2112973472350532, + "grad_norm": 0.17773684859275818, + "learning_rate": 1.3673945372599623e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8247856, + "step": 16755 + }, + { + "epoch": 2.211957239012802, + "grad_norm": 20.52808380126953, + "learning_rate": 1.366966018998505e-06, + "loss": 0.0835, + "num_input_tokens_seen": 8250352, + "step": 16760 + }, + { + "epoch": 2.2126171307905502, + "grad_norm": 0.12653538584709167, + "learning_rate": 1.3665374228511681e-06, + "loss": 0.0053, + "num_input_tokens_seen": 8252720, + "step": 16765 + }, + { + "epoch": 2.213277022568299, + "grad_norm": 0.15404640138149261, + "learning_rate": 1.366108748908917e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8255344, + "step": 16770 + }, + { + "epoch": 2.2139369143460472, + "grad_norm": 0.014432862401008606, + "learning_rate": 1.3656799972627355e-06, + "loss": 0.0782, + "num_input_tokens_seen": 8257648, + "step": 16775 + }, + { + "epoch": 2.2145968061237955, + "grad_norm": 0.0697983056306839, + "learning_rate": 1.3652511680036227e-06, + "loss": 0.0472, + "num_input_tokens_seen": 8260336, + "step": 16780 + }, + { + "epoch": 2.2152566979015442, + "grad_norm": 0.05055699869990349, + "learning_rate": 1.3648222612225941e-06, + "loss": 0.0985, + "num_input_tokens_seen": 8263152, + "step": 16785 + }, + { + "epoch": 2.2159165896792925, + "grad_norm": 0.0167390163987875, + "learning_rate": 1.3643932770106824e-06, + "loss": 0.0595, + "num_input_tokens_seen": 8265584, + "step": 16790 + }, + { + "epoch": 2.2165764814570412, + "grad_norm": 0.0399298258125782, + "learning_rate": 1.3639642154589365e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8267760, + "step": 16795 + }, + { + "epoch": 2.2172363732347895, + "grad_norm": 0.00978358555585146, + "learning_rate": 1.3635350766584217e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8270256, + "step": 16800 + }, + { + "epoch": 2.217896265012538, + "grad_norm": 0.005867898464202881, + "learning_rate": 1.363105860700219e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8273072, + "step": 16805 + }, + { + "epoch": 2.2185561567902865, + "grad_norm": 0.6499746441841125, + "learning_rate": 1.3626765676754274e-06, + "loss": 0.194, + "num_input_tokens_seen": 8275376, + "step": 16810 + }, + { + "epoch": 2.219216048568035, + "grad_norm": 0.03754541650414467, + "learning_rate": 1.3622471976751599e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8277872, + "step": 16815 + }, + { + "epoch": 2.2198759403457835, + "grad_norm": 0.4709387719631195, + "learning_rate": 1.3618177507905484e-06, + "loss": 0.0477, + "num_input_tokens_seen": 8280432, + "step": 16820 + }, + { + "epoch": 2.220535832123532, + "grad_norm": 0.026431599631905556, + "learning_rate": 1.361388227112739e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8282864, + "step": 16825 + }, + { + "epoch": 2.22119572390128, + "grad_norm": 0.0019147369312122464, + "learning_rate": 1.3609586267328955e-06, + "loss": 0.0597, + "num_input_tokens_seen": 8285360, + "step": 16830 + }, + { + "epoch": 2.221855615679029, + "grad_norm": 0.01568089984357357, + "learning_rate": 1.3605289497421974e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8287728, + "step": 16835 + }, + { + "epoch": 2.222515507456777, + "grad_norm": 23.796926498413086, + "learning_rate": 1.3600991962318403e-06, + "loss": 0.1485, + "num_input_tokens_seen": 8290288, + "step": 16840 + }, + { + "epoch": 2.2231753992345253, + "grad_norm": 0.047032494097948074, + "learning_rate": 1.3596693662930365e-06, + "loss": 0.0736, + "num_input_tokens_seen": 8292720, + "step": 16845 + }, + { + "epoch": 2.223835291012274, + "grad_norm": 0.1415061354637146, + "learning_rate": 1.3592394600170142e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8295280, + "step": 16850 + }, + { + "epoch": 2.2244951827900223, + "grad_norm": 0.022507498040795326, + "learning_rate": 1.3588094774950181e-06, + "loss": 0.0475, + "num_input_tokens_seen": 8297648, + "step": 16855 + }, + { + "epoch": 2.225155074567771, + "grad_norm": 0.02734128013253212, + "learning_rate": 1.3583794188183087e-06, + "loss": 0.0073, + "num_input_tokens_seen": 8300016, + "step": 16860 + }, + { + "epoch": 2.2258149663455193, + "grad_norm": 1.2373629808425903, + "learning_rate": 1.3579492840781625e-06, + "loss": 0.0011, + "num_input_tokens_seen": 8302512, + "step": 16865 + }, + { + "epoch": 2.2264748581232676, + "grad_norm": 0.1843707114458084, + "learning_rate": 1.357519073365873e-06, + "loss": 0.1018, + "num_input_tokens_seen": 8305136, + "step": 16870 + }, + { + "epoch": 2.2271347499010163, + "grad_norm": 0.016257228329777718, + "learning_rate": 1.357088786772749e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8307696, + "step": 16875 + }, + { + "epoch": 2.2277946416787646, + "grad_norm": 0.0035908890422433615, + "learning_rate": 1.3566584243901163e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8310000, + "step": 16880 + }, + { + "epoch": 2.228454533456513, + "grad_norm": 0.022007431834936142, + "learning_rate": 1.3562279863093154e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8312304, + "step": 16885 + }, + { + "epoch": 2.2291144252342616, + "grad_norm": 0.006274157669395208, + "learning_rate": 1.3557974726217041e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8314672, + "step": 16890 + }, + { + "epoch": 2.22977431701201, + "grad_norm": 0.01680285483598709, + "learning_rate": 1.3553668834186556e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8317168, + "step": 16895 + }, + { + "epoch": 2.2304342087897586, + "grad_norm": 0.0044866399839520454, + "learning_rate": 1.3549362187915593e-06, + "loss": 0.0642, + "num_input_tokens_seen": 8319792, + "step": 16900 + }, + { + "epoch": 2.231094100567507, + "grad_norm": 0.11140000075101852, + "learning_rate": 1.3545054788318212e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8322352, + "step": 16905 + }, + { + "epoch": 2.231753992345255, + "grad_norm": 26.079273223876953, + "learning_rate": 1.3540746636308623e-06, + "loss": 0.1334, + "num_input_tokens_seen": 8324848, + "step": 16910 + }, + { + "epoch": 2.232413884123004, + "grad_norm": 0.0065078651532530785, + "learning_rate": 1.3536437732801198e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8327088, + "step": 16915 + }, + { + "epoch": 2.233073775900752, + "grad_norm": 0.6009857058525085, + "learning_rate": 1.3532128078710474e-06, + "loss": 0.0014, + "num_input_tokens_seen": 8329712, + "step": 16920 + }, + { + "epoch": 2.233733667678501, + "grad_norm": 0.03584284335374832, + "learning_rate": 1.3527817674951143e-06, + "loss": 0.0088, + "num_input_tokens_seen": 8332336, + "step": 16925 + }, + { + "epoch": 2.234393559456249, + "grad_norm": 0.07889010012149811, + "learning_rate": 1.3523506522438056e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8334640, + "step": 16930 + }, + { + "epoch": 2.2350534512339975, + "grad_norm": 14.214226722717285, + "learning_rate": 1.3519194622086227e-06, + "loss": 0.0535, + "num_input_tokens_seen": 8337072, + "step": 16935 + }, + { + "epoch": 2.235713343011746, + "grad_norm": 66.53902435302734, + "learning_rate": 1.3514881974810823e-06, + "loss": 0.0806, + "num_input_tokens_seen": 8339376, + "step": 16940 + }, + { + "epoch": 2.2363732347894945, + "grad_norm": 0.24029788374900818, + "learning_rate": 1.3510568581527171e-06, + "loss": 0.1198, + "num_input_tokens_seen": 8341616, + "step": 16945 + }, + { + "epoch": 2.237033126567243, + "grad_norm": 0.02471575327217579, + "learning_rate": 1.3506254443150761e-06, + "loss": 0.0794, + "num_input_tokens_seen": 8344176, + "step": 16950 + }, + { + "epoch": 2.2376930183449915, + "grad_norm": 0.006826276425272226, + "learning_rate": 1.3501939560597233e-06, + "loss": 0.0985, + "num_input_tokens_seen": 8346608, + "step": 16955 + }, + { + "epoch": 2.2383529101227397, + "grad_norm": 0.03343689441680908, + "learning_rate": 1.3497623934782397e-06, + "loss": 0.0123, + "num_input_tokens_seen": 8349424, + "step": 16960 + }, + { + "epoch": 2.2390128019004885, + "grad_norm": 0.22107385098934174, + "learning_rate": 1.3493307566622204e-06, + "loss": 0.0019, + "num_input_tokens_seen": 8351728, + "step": 16965 + }, + { + "epoch": 2.2396726936782367, + "grad_norm": 6.898412704467773, + "learning_rate": 1.3488990457032778e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8354160, + "step": 16970 + }, + { + "epoch": 2.240332585455985, + "grad_norm": 0.013253572396934032, + "learning_rate": 1.3484672606930393e-06, + "loss": 0.0096, + "num_input_tokens_seen": 8356272, + "step": 16975 + }, + { + "epoch": 2.2409924772337337, + "grad_norm": 0.018749356269836426, + "learning_rate": 1.3480354017231483e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8358576, + "step": 16980 + }, + { + "epoch": 2.241652369011482, + "grad_norm": 0.25545141100883484, + "learning_rate": 1.3476034688852633e-06, + "loss": 0.0025, + "num_input_tokens_seen": 8361008, + "step": 16985 + }, + { + "epoch": 2.2423122607892307, + "grad_norm": 0.03576225787401199, + "learning_rate": 1.3471714622710595e-06, + "loss": 0.056, + "num_input_tokens_seen": 8363504, + "step": 16990 + }, + { + "epoch": 2.242972152566979, + "grad_norm": 0.005058703012764454, + "learning_rate": 1.3467393819722265e-06, + "loss": 0.0, + "num_input_tokens_seen": 8365680, + "step": 16995 + }, + { + "epoch": 2.2436320443447273, + "grad_norm": 0.02662578783929348, + "learning_rate": 1.3463072280804708e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8368432, + "step": 17000 + }, + { + "epoch": 2.244291936122476, + "grad_norm": 0.00669122114777565, + "learning_rate": 1.3458750006875134e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8370736, + "step": 17005 + }, + { + "epoch": 2.2449518279002243, + "grad_norm": 0.0020471608731895685, + "learning_rate": 1.3454426998850919e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8373488, + "step": 17010 + }, + { + "epoch": 2.245611719677973, + "grad_norm": 0.02016337215900421, + "learning_rate": 1.345010325764959e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8375984, + "step": 17015 + }, + { + "epoch": 2.2462716114557213, + "grad_norm": 0.015209107659757137, + "learning_rate": 1.3445778784188828e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8378480, + "step": 17020 + }, + { + "epoch": 2.2469315032334696, + "grad_norm": 0.284445583820343, + "learning_rate": 1.3441453579386468e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8380656, + "step": 17025 + }, + { + "epoch": 2.2475913950112183, + "grad_norm": 43.588706970214844, + "learning_rate": 1.343712764416051e-06, + "loss": 0.0447, + "num_input_tokens_seen": 8383408, + "step": 17030 + }, + { + "epoch": 2.2482512867889666, + "grad_norm": 0.0007529565482400358, + "learning_rate": 1.3432800979429097e-06, + "loss": 0.1017, + "num_input_tokens_seen": 8385904, + "step": 17035 + }, + { + "epoch": 2.248911178566715, + "grad_norm": 0.32451948523521423, + "learning_rate": 1.3428473586110537e-06, + "loss": 0.0847, + "num_input_tokens_seen": 8388400, + "step": 17040 + }, + { + "epoch": 2.2495710703444636, + "grad_norm": 0.013873131014406681, + "learning_rate": 1.3424145465123286e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8390640, + "step": 17045 + }, + { + "epoch": 2.250230962122212, + "grad_norm": 0.0216103233397007, + "learning_rate": 1.3419816617385953e-06, + "loss": 0.0, + "num_input_tokens_seen": 8393200, + "step": 17050 + }, + { + "epoch": 2.2508908538999606, + "grad_norm": 0.006166580133140087, + "learning_rate": 1.3415487043817311e-06, + "loss": 0.0556, + "num_input_tokens_seen": 8395632, + "step": 17055 + }, + { + "epoch": 2.2508908538999606, + "eval_loss": 0.1500292271375656, + "eval_runtime": 7.8286, + "eval_samples_per_second": 860.312, + "eval_steps_per_second": 107.555, + "num_input_tokens_seen": 8395632, + "step": 17055 + }, + { + "epoch": 2.251550745677709, + "grad_norm": 0.015901878476142883, + "learning_rate": 1.3411156745336272e-06, + "loss": 0.0, + "num_input_tokens_seen": 8397872, + "step": 17060 + }, + { + "epoch": 2.252210637455457, + "grad_norm": 0.4960050880908966, + "learning_rate": 1.3406825722861921e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8400432, + "step": 17065 + }, + { + "epoch": 2.252870529233206, + "grad_norm": 0.009557882323861122, + "learning_rate": 1.3402493977313476e-06, + "loss": 0.0627, + "num_input_tokens_seen": 8402608, + "step": 17070 + }, + { + "epoch": 2.253530421010954, + "grad_norm": 0.17954318225383759, + "learning_rate": 1.3398161509610324e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8404848, + "step": 17075 + }, + { + "epoch": 2.254190312788703, + "grad_norm": 0.003647751174867153, + "learning_rate": 1.3393828320672e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8407216, + "step": 17080 + }, + { + "epoch": 2.254850204566451, + "grad_norm": 0.09699433296918869, + "learning_rate": 1.3389494411418192e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8409648, + "step": 17085 + }, + { + "epoch": 2.2555100963441994, + "grad_norm": 0.026855552569031715, + "learning_rate": 1.3385159782768738e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8412016, + "step": 17090 + }, + { + "epoch": 2.256169988121948, + "grad_norm": 0.015472883358597755, + "learning_rate": 1.3380824435643633e-06, + "loss": 0.0763, + "num_input_tokens_seen": 8414448, + "step": 17095 + }, + { + "epoch": 2.2568298798996964, + "grad_norm": 0.0023638952989131212, + "learning_rate": 1.3376488370963027e-06, + "loss": 0.1161, + "num_input_tokens_seen": 8416752, + "step": 17100 + }, + { + "epoch": 2.257489771677445, + "grad_norm": 0.0017636867705732584, + "learning_rate": 1.3372151589647212e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8419120, + "step": 17105 + }, + { + "epoch": 2.2581496634551934, + "grad_norm": 28.924041748046875, + "learning_rate": 1.3367814092616642e-06, + "loss": 0.0876, + "num_input_tokens_seen": 8421296, + "step": 17110 + }, + { + "epoch": 2.2588095552329417, + "grad_norm": 10.672141075134277, + "learning_rate": 1.336347588079192e-06, + "loss": 0.0389, + "num_input_tokens_seen": 8423536, + "step": 17115 + }, + { + "epoch": 2.2594694470106904, + "grad_norm": 0.0031343603041023016, + "learning_rate": 1.3359136955093798e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8426096, + "step": 17120 + }, + { + "epoch": 2.2601293387884387, + "grad_norm": 0.03127521276473999, + "learning_rate": 1.335479731644318e-06, + "loss": 0.0383, + "num_input_tokens_seen": 8428464, + "step": 17125 + }, + { + "epoch": 2.260789230566187, + "grad_norm": 0.004107305780053139, + "learning_rate": 1.3350456965761127e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8431088, + "step": 17130 + }, + { + "epoch": 2.2614491223439357, + "grad_norm": 0.032716382294893265, + "learning_rate": 1.3346115903968845e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8433328, + "step": 17135 + }, + { + "epoch": 2.262109014121684, + "grad_norm": 0.14623123407363892, + "learning_rate": 1.3341774131987694e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8435760, + "step": 17140 + }, + { + "epoch": 2.2627689058994322, + "grad_norm": 2.8802661895751953, + "learning_rate": 1.333743165073918e-06, + "loss": 0.0319, + "num_input_tokens_seen": 8437936, + "step": 17145 + }, + { + "epoch": 2.263428797677181, + "grad_norm": 0.004641890060156584, + "learning_rate": 1.3333088461144967e-06, + "loss": 0.0011, + "num_input_tokens_seen": 8440496, + "step": 17150 + }, + { + "epoch": 2.2640886894549292, + "grad_norm": 14.824410438537598, + "learning_rate": 1.3328744564126868e-06, + "loss": 0.0517, + "num_input_tokens_seen": 8442736, + "step": 17155 + }, + { + "epoch": 2.264748581232678, + "grad_norm": 0.06626929342746735, + "learning_rate": 1.3324399960606835e-06, + "loss": 0.1567, + "num_input_tokens_seen": 8445424, + "step": 17160 + }, + { + "epoch": 2.2654084730104262, + "grad_norm": 0.00443276995792985, + "learning_rate": 1.3320054651506985e-06, + "loss": 0.0549, + "num_input_tokens_seen": 8448048, + "step": 17165 + }, + { + "epoch": 2.2660683647881745, + "grad_norm": 0.03811829909682274, + "learning_rate": 1.331570863774958e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8450288, + "step": 17170 + }, + { + "epoch": 2.2667282565659232, + "grad_norm": 0.09436263144016266, + "learning_rate": 1.3311361920257024e-06, + "loss": 0.0566, + "num_input_tokens_seen": 8452592, + "step": 17175 + }, + { + "epoch": 2.2673881483436715, + "grad_norm": 0.13137038052082062, + "learning_rate": 1.3307014499951882e-06, + "loss": 0.0006, + "num_input_tokens_seen": 8454960, + "step": 17180 + }, + { + "epoch": 2.2680480401214203, + "grad_norm": 0.032147493213415146, + "learning_rate": 1.3302666377756859e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8457328, + "step": 17185 + }, + { + "epoch": 2.2687079318991685, + "grad_norm": 0.01971900463104248, + "learning_rate": 1.3298317554594813e-06, + "loss": 0.083, + "num_input_tokens_seen": 8459824, + "step": 17190 + }, + { + "epoch": 2.269367823676917, + "grad_norm": 0.014136100187897682, + "learning_rate": 1.3293968031388752e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8462448, + "step": 17195 + }, + { + "epoch": 2.2700277154546655, + "grad_norm": 30.553123474121094, + "learning_rate": 1.3289617809061827e-06, + "loss": 0.1421, + "num_input_tokens_seen": 8464752, + "step": 17200 + }, + { + "epoch": 2.270687607232414, + "grad_norm": 23.864639282226562, + "learning_rate": 1.3285266888537346e-06, + "loss": 0.0642, + "num_input_tokens_seen": 8467184, + "step": 17205 + }, + { + "epoch": 2.2713474990101625, + "grad_norm": 0.978489100933075, + "learning_rate": 1.3280915270738754e-06, + "loss": 0.1786, + "num_input_tokens_seen": 8469680, + "step": 17210 + }, + { + "epoch": 2.272007390787911, + "grad_norm": 0.007936615496873856, + "learning_rate": 1.3276562956589656e-06, + "loss": 0.0017, + "num_input_tokens_seen": 8471920, + "step": 17215 + }, + { + "epoch": 2.272667282565659, + "grad_norm": 0.06761188060045242, + "learning_rate": 1.32722099470138e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8474608, + "step": 17220 + }, + { + "epoch": 2.273327174343408, + "grad_norm": 0.005021695047616959, + "learning_rate": 1.3267856242935076e-06, + "loss": 0.0253, + "num_input_tokens_seen": 8476848, + "step": 17225 + }, + { + "epoch": 2.273987066121156, + "grad_norm": 0.03890543803572655, + "learning_rate": 1.3263501845277528e-06, + "loss": 0.0448, + "num_input_tokens_seen": 8479280, + "step": 17230 + }, + { + "epoch": 2.274646957898905, + "grad_norm": 0.0053220209665596485, + "learning_rate": 1.3259146754965346e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8481776, + "step": 17235 + }, + { + "epoch": 2.275306849676653, + "grad_norm": 0.011229145340621471, + "learning_rate": 1.3254790972922867e-06, + "loss": 0.0031, + "num_input_tokens_seen": 8484208, + "step": 17240 + }, + { + "epoch": 2.2759667414544014, + "grad_norm": 0.003528717439621687, + "learning_rate": 1.3250434500074574e-06, + "loss": 0.047, + "num_input_tokens_seen": 8486832, + "step": 17245 + }, + { + "epoch": 2.27662663323215, + "grad_norm": 0.02159113623201847, + "learning_rate": 1.3246077337345097e-06, + "loss": 0.0611, + "num_input_tokens_seen": 8489328, + "step": 17250 + }, + { + "epoch": 2.2772865250098984, + "grad_norm": 0.005418762564659119, + "learning_rate": 1.3241719485659206e-06, + "loss": 0.0427, + "num_input_tokens_seen": 8491696, + "step": 17255 + }, + { + "epoch": 2.2779464167876466, + "grad_norm": 0.4703400135040283, + "learning_rate": 1.3237360945941834e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8494320, + "step": 17260 + }, + { + "epoch": 2.2786063085653954, + "grad_norm": 44.323387145996094, + "learning_rate": 1.3233001719118043e-06, + "loss": 0.0442, + "num_input_tokens_seen": 8496560, + "step": 17265 + }, + { + "epoch": 2.2792662003431436, + "grad_norm": 0.024648388847708702, + "learning_rate": 1.3228641806113047e-06, + "loss": 0.1099, + "num_input_tokens_seen": 8498928, + "step": 17270 + }, + { + "epoch": 2.2799260921208924, + "grad_norm": 0.035347118973731995, + "learning_rate": 1.3224281207852213e-06, + "loss": 0.0006, + "num_input_tokens_seen": 8501552, + "step": 17275 + }, + { + "epoch": 2.2805859838986406, + "grad_norm": 0.03767740726470947, + "learning_rate": 1.3219919925261034e-06, + "loss": 0.0854, + "num_input_tokens_seen": 8503792, + "step": 17280 + }, + { + "epoch": 2.281245875676389, + "grad_norm": 0.041668906807899475, + "learning_rate": 1.321555795926517e-06, + "loss": 0.0475, + "num_input_tokens_seen": 8505776, + "step": 17285 + }, + { + "epoch": 2.2819057674541376, + "grad_norm": 0.008051461540162563, + "learning_rate": 1.3211195310790415e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8508272, + "step": 17290 + }, + { + "epoch": 2.282565659231886, + "grad_norm": 0.004521454218775034, + "learning_rate": 1.3206831980762712e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8510768, + "step": 17295 + }, + { + "epoch": 2.283225551009634, + "grad_norm": 0.006502344273030758, + "learning_rate": 1.320246797010814e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8513520, + "step": 17300 + }, + { + "epoch": 2.283885442787383, + "grad_norm": 0.025791212916374207, + "learning_rate": 1.319810327975293e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8516080, + "step": 17305 + }, + { + "epoch": 2.284545334565131, + "grad_norm": 0.2241465449333191, + "learning_rate": 1.3193737910623462e-06, + "loss": 0.0675, + "num_input_tokens_seen": 8518448, + "step": 17310 + }, + { + "epoch": 2.28520522634288, + "grad_norm": 0.03452041745185852, + "learning_rate": 1.3189371863646246e-06, + "loss": 0.0, + "num_input_tokens_seen": 8520624, + "step": 17315 + }, + { + "epoch": 2.285865118120628, + "grad_norm": 73.1903305053711, + "learning_rate": 1.318500513974795e-06, + "loss": 0.1649, + "num_input_tokens_seen": 8523248, + "step": 17320 + }, + { + "epoch": 2.2865250098983765, + "grad_norm": 0.013924540020525455, + "learning_rate": 1.3180637739855376e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8525552, + "step": 17325 + }, + { + "epoch": 2.287184901676125, + "grad_norm": 0.006592525169253349, + "learning_rate": 1.3176269664895476e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8528048, + "step": 17330 + }, + { + "epoch": 2.2878447934538735, + "grad_norm": 0.464189738035202, + "learning_rate": 1.3171900915795338e-06, + "loss": 0.0557, + "num_input_tokens_seen": 8530480, + "step": 17335 + }, + { + "epoch": 2.288504685231622, + "grad_norm": 0.0561629980802536, + "learning_rate": 1.31675314934822e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8533040, + "step": 17340 + }, + { + "epoch": 2.2891645770093705, + "grad_norm": 16.04241371154785, + "learning_rate": 1.316316139888344e-06, + "loss": 0.1025, + "num_input_tokens_seen": 8535536, + "step": 17345 + }, + { + "epoch": 2.2898244687871188, + "grad_norm": 20.588781356811523, + "learning_rate": 1.3158790632926579e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8538032, + "step": 17350 + }, + { + "epoch": 2.2904843605648675, + "grad_norm": 0.01548085082322359, + "learning_rate": 1.3154419196539281e-06, + "loss": 0.0595, + "num_input_tokens_seen": 8540528, + "step": 17355 + }, + { + "epoch": 2.2911442523426158, + "grad_norm": 0.01596042700111866, + "learning_rate": 1.315004709064935e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8543024, + "step": 17360 + }, + { + "epoch": 2.2918041441203645, + "grad_norm": 0.006322584114968777, + "learning_rate": 1.3145674316184736e-06, + "loss": 0.0565, + "num_input_tokens_seen": 8545520, + "step": 17365 + }, + { + "epoch": 2.2924640358981128, + "grad_norm": 0.014929535798728466, + "learning_rate": 1.3141300874073524e-06, + "loss": 0.0457, + "num_input_tokens_seen": 8548016, + "step": 17370 + }, + { + "epoch": 2.293123927675861, + "grad_norm": 0.013482524082064629, + "learning_rate": 1.3136926765243955e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8550512, + "step": 17375 + }, + { + "epoch": 2.2937838194536098, + "grad_norm": 29.703046798706055, + "learning_rate": 1.3132551990624392e-06, + "loss": 0.055, + "num_input_tokens_seen": 8552816, + "step": 17380 + }, + { + "epoch": 2.294443711231358, + "grad_norm": 1.5670666694641113, + "learning_rate": 1.3128176551143352e-06, + "loss": 0.06, + "num_input_tokens_seen": 8555312, + "step": 17385 + }, + { + "epoch": 2.2951036030091063, + "grad_norm": 24.488922119140625, + "learning_rate": 1.3123800447729497e-06, + "loss": 0.0493, + "num_input_tokens_seen": 8557552, + "step": 17390 + }, + { + "epoch": 2.295763494786855, + "grad_norm": 0.008273608982563019, + "learning_rate": 1.3119423681311612e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8559920, + "step": 17395 + }, + { + "epoch": 2.2964233865646033, + "grad_norm": 0.031402088701725006, + "learning_rate": 1.3115046252818644e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8562544, + "step": 17400 + }, + { + "epoch": 2.297083278342352, + "grad_norm": 0.015473921783268452, + "learning_rate": 1.3110668163179664e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8565168, + "step": 17405 + }, + { + "epoch": 2.2977431701201003, + "grad_norm": 0.061862409114837646, + "learning_rate": 1.3106289413323891e-06, + "loss": 0.0382, + "num_input_tokens_seen": 8567664, + "step": 17410 + }, + { + "epoch": 2.2984030618978486, + "grad_norm": 0.0838281512260437, + "learning_rate": 1.3101910004180685e-06, + "loss": 0.006, + "num_input_tokens_seen": 8569776, + "step": 17415 + }, + { + "epoch": 2.2990629536755973, + "grad_norm": 0.003964480943977833, + "learning_rate": 1.3097529936679545e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8571952, + "step": 17420 + }, + { + "epoch": 2.2997228454533456, + "grad_norm": 0.006393681280314922, + "learning_rate": 1.3093149211750105e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8574384, + "step": 17425 + }, + { + "epoch": 2.300382737231094, + "grad_norm": 0.003106580814346671, + "learning_rate": 1.3088767830322142e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8576816, + "step": 17430 + }, + { + "epoch": 2.3010426290088426, + "grad_norm": 0.001471186289563775, + "learning_rate": 1.3084385793325575e-06, + "loss": 0.0389, + "num_input_tokens_seen": 8579184, + "step": 17435 + }, + { + "epoch": 2.301702520786591, + "grad_norm": 0.001006085192784667, + "learning_rate": 1.308000310169046e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8581616, + "step": 17440 + }, + { + "epoch": 2.3023624125643396, + "grad_norm": 0.0674763098359108, + "learning_rate": 1.307561975634699e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8584048, + "step": 17445 + }, + { + "epoch": 2.303022304342088, + "grad_norm": 0.01596246100962162, + "learning_rate": 1.3071235758225497e-06, + "loss": 0.0, + "num_input_tokens_seen": 8586288, + "step": 17450 + }, + { + "epoch": 2.303682196119836, + "grad_norm": 0.05637786537408829, + "learning_rate": 1.3066851108256457e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8588784, + "step": 17455 + }, + { + "epoch": 2.304342087897585, + "grad_norm": 0.008062033914029598, + "learning_rate": 1.3062465807370475e-06, + "loss": 0.0577, + "num_input_tokens_seen": 8591216, + "step": 17460 + }, + { + "epoch": 2.305001979675333, + "grad_norm": 0.020235441625118256, + "learning_rate": 1.3058079856498302e-06, + "loss": 0.1142, + "num_input_tokens_seen": 8593904, + "step": 17465 + }, + { + "epoch": 2.305661871453082, + "grad_norm": 0.012712682597339153, + "learning_rate": 1.3053693256570829e-06, + "loss": 0.0, + "num_input_tokens_seen": 8596208, + "step": 17470 + }, + { + "epoch": 2.30632176323083, + "grad_norm": 0.21507523953914642, + "learning_rate": 1.304930600851907e-06, + "loss": 0.085, + "num_input_tokens_seen": 8598768, + "step": 17475 + }, + { + "epoch": 2.3069816550085784, + "grad_norm": 0.2181164175271988, + "learning_rate": 1.3044918113274195e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8601008, + "step": 17480 + }, + { + "epoch": 2.307641546786327, + "grad_norm": 0.004835736472159624, + "learning_rate": 1.3040529571767498e-06, + "loss": 0.127, + "num_input_tokens_seen": 8603632, + "step": 17485 + }, + { + "epoch": 2.3083014385640754, + "grad_norm": 0.42590686678886414, + "learning_rate": 1.3036140384930416e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8605872, + "step": 17490 + }, + { + "epoch": 2.308961330341824, + "grad_norm": 0.06867695599794388, + "learning_rate": 1.3031750553694528e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8608432, + "step": 17495 + }, + { + "epoch": 2.3096212221195724, + "grad_norm": 0.010033776052296162, + "learning_rate": 1.3027360078991535e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8610736, + "step": 17500 + }, + { + "epoch": 2.3102811138973207, + "grad_norm": 0.037731487303972244, + "learning_rate": 1.302296896175329e-06, + "loss": 0.0782, + "num_input_tokens_seen": 8613616, + "step": 17505 + }, + { + "epoch": 2.3109410056750694, + "grad_norm": 0.0016087280819192529, + "learning_rate": 1.3018577202911774e-06, + "loss": 0.0256, + "num_input_tokens_seen": 8616048, + "step": 17510 + }, + { + "epoch": 2.3116008974528177, + "grad_norm": 21.856361389160156, + "learning_rate": 1.3014184803399104e-06, + "loss": 0.1737, + "num_input_tokens_seen": 8618224, + "step": 17515 + }, + { + "epoch": 2.312260789230566, + "grad_norm": 0.1102667823433876, + "learning_rate": 1.3009791764147537e-06, + "loss": 0.0467, + "num_input_tokens_seen": 8620784, + "step": 17520 + }, + { + "epoch": 2.3129206810083147, + "grad_norm": 0.007832813076674938, + "learning_rate": 1.3005398086089462e-06, + "loss": 0.0, + "num_input_tokens_seen": 8623152, + "step": 17525 + }, + { + "epoch": 2.313580572786063, + "grad_norm": 0.10257086157798767, + "learning_rate": 1.3001003770157409e-06, + "loss": 0.034, + "num_input_tokens_seen": 8625456, + "step": 17530 + }, + { + "epoch": 2.3142404645638117, + "grad_norm": 0.22831708192825317, + "learning_rate": 1.2996608817284033e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8627952, + "step": 17535 + }, + { + "epoch": 2.31490035634156, + "grad_norm": 0.02536954917013645, + "learning_rate": 1.2992213228402142e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8630640, + "step": 17540 + }, + { + "epoch": 2.3155602481193083, + "grad_norm": 0.013890203088521957, + "learning_rate": 1.2987817004444654e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8633520, + "step": 17545 + }, + { + "epoch": 2.316220139897057, + "grad_norm": 0.0018548115622252226, + "learning_rate": 1.2983420146344648e-06, + "loss": 0.0799, + "num_input_tokens_seen": 8636208, + "step": 17550 + }, + { + "epoch": 2.3168800316748053, + "grad_norm": 0.05500582605600357, + "learning_rate": 1.297902265503532e-06, + "loss": 0.0427, + "num_input_tokens_seen": 8638512, + "step": 17555 + }, + { + "epoch": 2.3175399234525536, + "grad_norm": 0.0030148825608193874, + "learning_rate": 1.2974624531450003e-06, + "loss": 0.1341, + "num_input_tokens_seen": 8640944, + "step": 17560 + }, + { + "epoch": 2.3181998152303023, + "grad_norm": 0.014523412100970745, + "learning_rate": 1.2970225776522172e-06, + "loss": 0.1493, + "num_input_tokens_seen": 8643632, + "step": 17565 + }, + { + "epoch": 2.3188597070080506, + "grad_norm": 0.004904015921056271, + "learning_rate": 1.2965826391185425e-06, + "loss": 0.018, + "num_input_tokens_seen": 8646064, + "step": 17570 + }, + { + "epoch": 2.3195195987857993, + "grad_norm": 0.07339364290237427, + "learning_rate": 1.2961426376373507e-06, + "loss": 0.0023, + "num_input_tokens_seen": 8648560, + "step": 17575 + }, + { + "epoch": 2.3201794905635476, + "grad_norm": 0.0362345390021801, + "learning_rate": 1.2957025733020285e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8651056, + "step": 17580 + }, + { + "epoch": 2.320839382341296, + "grad_norm": 0.013479121029376984, + "learning_rate": 1.2952624462059767e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8653552, + "step": 17585 + }, + { + "epoch": 2.3214992741190446, + "grad_norm": 0.06915712356567383, + "learning_rate": 1.2948222564426083e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8656048, + "step": 17590 + }, + { + "epoch": 2.322159165896793, + "grad_norm": 0.023110028356313705, + "learning_rate": 1.2943820041053512e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8658352, + "step": 17595 + }, + { + "epoch": 2.3228190576745416, + "grad_norm": 0.01199672743678093, + "learning_rate": 1.2939416892876451e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8660720, + "step": 17600 + }, + { + "epoch": 2.32347894945229, + "grad_norm": 0.0005031914915889502, + "learning_rate": 1.2935013120829443e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8663024, + "step": 17605 + }, + { + "epoch": 2.324138841230038, + "grad_norm": 16.33403205871582, + "learning_rate": 1.2930608725847156e-06, + "loss": 0.0411, + "num_input_tokens_seen": 8665392, + "step": 17610 + }, + { + "epoch": 2.324798733007787, + "grad_norm": 0.02279752679169178, + "learning_rate": 1.2926203708864385e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8667824, + "step": 17615 + }, + { + "epoch": 2.325458624785535, + "grad_norm": 27.623626708984375, + "learning_rate": 1.2921798070816068e-06, + "loss": 0.1861, + "num_input_tokens_seen": 8670448, + "step": 17620 + }, + { + "epoch": 2.326118516563284, + "grad_norm": 0.006070361007004976, + "learning_rate": 1.2917391812637269e-06, + "loss": 0.0, + "num_input_tokens_seen": 8672944, + "step": 17625 + }, + { + "epoch": 2.326778408341032, + "grad_norm": 0.35379403829574585, + "learning_rate": 1.2912984935263183e-06, + "loss": 0.0659, + "num_input_tokens_seen": 8675248, + "step": 17630 + }, + { + "epoch": 2.3274383001187804, + "grad_norm": 0.1110011488199234, + "learning_rate": 1.290857743962914e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8677680, + "step": 17635 + }, + { + "epoch": 2.328098191896529, + "grad_norm": 0.007278754375874996, + "learning_rate": 1.2904169326670596e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8680048, + "step": 17640 + }, + { + "epoch": 2.3287580836742774, + "grad_norm": 0.11822202056646347, + "learning_rate": 1.2899760597323144e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8682224, + "step": 17645 + }, + { + "epoch": 2.329417975452026, + "grad_norm": 0.015329859219491482, + "learning_rate": 1.2895351252522502e-06, + "loss": 0.0956, + "num_input_tokens_seen": 8684784, + "step": 17650 + }, + { + "epoch": 2.3300778672297744, + "grad_norm": 0.6385002136230469, + "learning_rate": 1.2890941293204525e-06, + "loss": 0.0431, + "num_input_tokens_seen": 8687088, + "step": 17655 + }, + { + "epoch": 2.3307377590075227, + "grad_norm": 0.0072060092352330685, + "learning_rate": 1.2886530720305193e-06, + "loss": 0.0472, + "num_input_tokens_seen": 8689264, + "step": 17660 + }, + { + "epoch": 2.3313976507852714, + "grad_norm": 0.007306993473321199, + "learning_rate": 1.2882119534760618e-06, + "loss": 0.147, + "num_input_tokens_seen": 8691760, + "step": 17665 + }, + { + "epoch": 2.3320575425630197, + "grad_norm": 0.555959939956665, + "learning_rate": 1.2877707737507043e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8694128, + "step": 17670 + }, + { + "epoch": 2.332717434340768, + "grad_norm": 0.020105179399251938, + "learning_rate": 1.2873295329480837e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8696688, + "step": 17675 + }, + { + "epoch": 2.3333773261185167, + "grad_norm": 0.019031627103686333, + "learning_rate": 1.2868882311618505e-06, + "loss": 0.1152, + "num_input_tokens_seen": 8699120, + "step": 17680 + }, + { + "epoch": 2.334037217896265, + "grad_norm": 0.3021307587623596, + "learning_rate": 1.286446868485668e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8701552, + "step": 17685 + }, + { + "epoch": 2.3346971096740132, + "grad_norm": 0.04043988510966301, + "learning_rate": 1.2860054450132116e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8704048, + "step": 17690 + }, + { + "epoch": 2.335357001451762, + "grad_norm": 2.304701089859009, + "learning_rate": 1.2855639608381706e-06, + "loss": 0.0014, + "num_input_tokens_seen": 8706480, + "step": 17695 + }, + { + "epoch": 2.3360168932295102, + "grad_norm": 0.0803212970495224, + "learning_rate": 1.2851224160542472e-06, + "loss": 0.0017, + "num_input_tokens_seen": 8709040, + "step": 17700 + }, + { + "epoch": 2.336676785007259, + "grad_norm": 0.03722332417964935, + "learning_rate": 1.2846808107551553e-06, + "loss": 0.0613, + "num_input_tokens_seen": 8711472, + "step": 17705 + }, + { + "epoch": 2.3373366767850072, + "grad_norm": 0.012616438791155815, + "learning_rate": 1.2842391450346228e-06, + "loss": 0.0097, + "num_input_tokens_seen": 8713904, + "step": 17710 + }, + { + "epoch": 2.3379965685627555, + "grad_norm": 49.51243591308594, + "learning_rate": 1.2837974189863902e-06, + "loss": 0.1691, + "num_input_tokens_seen": 8716144, + "step": 17715 + }, + { + "epoch": 2.3386564603405042, + "grad_norm": 0.007718805689364672, + "learning_rate": 1.2833556327042105e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8718448, + "step": 17720 + }, + { + "epoch": 2.3393163521182525, + "grad_norm": 24.115741729736328, + "learning_rate": 1.2829137862818496e-06, + "loss": 0.114, + "num_input_tokens_seen": 8720624, + "step": 17725 + }, + { + "epoch": 2.3399762438960012, + "grad_norm": 0.0347772054374218, + "learning_rate": 1.2824718798130862e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8723312, + "step": 17730 + }, + { + "epoch": 2.3406361356737495, + "grad_norm": 0.018872858956456184, + "learning_rate": 1.2820299133917122e-06, + "loss": 0.1246, + "num_input_tokens_seen": 8725680, + "step": 17735 + }, + { + "epoch": 2.341296027451498, + "grad_norm": 0.022448837757110596, + "learning_rate": 1.281587887111531e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8727984, + "step": 17740 + }, + { + "epoch": 2.3419559192292465, + "grad_norm": 0.041912712156772614, + "learning_rate": 1.28114580106636e-06, + "loss": 0.0371, + "num_input_tokens_seen": 8730416, + "step": 17745 + }, + { + "epoch": 2.342615811006995, + "grad_norm": 0.07139486819505692, + "learning_rate": 1.2807036553500286e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8733104, + "step": 17750 + }, + { + "epoch": 2.3432757027847435, + "grad_norm": 0.19774889945983887, + "learning_rate": 1.280261450056379e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8735600, + "step": 17755 + }, + { + "epoch": 2.343935594562492, + "grad_norm": 0.012960204854607582, + "learning_rate": 1.2798191852792662e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8738032, + "step": 17760 + }, + { + "epoch": 2.34459548634024, + "grad_norm": 0.004742769058793783, + "learning_rate": 1.2793768611125576e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8740464, + "step": 17765 + }, + { + "epoch": 2.345255378117989, + "grad_norm": 0.02533571422100067, + "learning_rate": 1.2789344776501333e-06, + "loss": 0.0731, + "num_input_tokens_seen": 8742960, + "step": 17770 + }, + { + "epoch": 2.345915269895737, + "grad_norm": 0.02396445721387863, + "learning_rate": 1.2784920349858858e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8745648, + "step": 17775 + }, + { + "epoch": 2.346575161673486, + "grad_norm": 0.003967063035815954, + "learning_rate": 1.278049533213721e-06, + "loss": 0.0027, + "num_input_tokens_seen": 8748272, + "step": 17780 + }, + { + "epoch": 2.347235053451234, + "grad_norm": 0.05136928707361221, + "learning_rate": 1.2776069724275557e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8750832, + "step": 17785 + }, + { + "epoch": 2.3478949452289823, + "grad_norm": 21.935380935668945, + "learning_rate": 1.277164352721321e-06, + "loss": 0.1713, + "num_input_tokens_seen": 8753200, + "step": 17790 + }, + { + "epoch": 2.348554837006731, + "grad_norm": 0.004382560960948467, + "learning_rate": 1.27672167418896e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8755824, + "step": 17795 + }, + { + "epoch": 2.3492147287844793, + "grad_norm": 29.610349655151367, + "learning_rate": 1.276278936924427e-06, + "loss": 0.0133, + "num_input_tokens_seen": 8758128, + "step": 17800 + }, + { + "epoch": 2.3498746205622276, + "grad_norm": 0.5538921356201172, + "learning_rate": 1.2758361410216902e-06, + "loss": 0.0009, + "num_input_tokens_seen": 8760624, + "step": 17805 + }, + { + "epoch": 2.3505345123399763, + "grad_norm": 0.014471026137471199, + "learning_rate": 1.2753932865747302e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8762864, + "step": 17810 + }, + { + "epoch": 2.3511944041177246, + "grad_norm": 53.138797760009766, + "learning_rate": 1.2749503736775395e-06, + "loss": 0.0598, + "num_input_tokens_seen": 8765424, + "step": 17815 + }, + { + "epoch": 2.351854295895473, + "grad_norm": 0.002378394827246666, + "learning_rate": 1.2745074024241227e-06, + "loss": 0.0, + "num_input_tokens_seen": 8768048, + "step": 17820 + }, + { + "epoch": 2.3525141876732216, + "grad_norm": 0.09277091920375824, + "learning_rate": 1.2740643729084974e-06, + "loss": 0.0296, + "num_input_tokens_seen": 8770672, + "step": 17825 + }, + { + "epoch": 2.35317407945097, + "grad_norm": 92.40314483642578, + "learning_rate": 1.273621285224694e-06, + "loss": 0.0406, + "num_input_tokens_seen": 8773424, + "step": 17830 + }, + { + "epoch": 2.3538339712287186, + "grad_norm": 0.003913685213774443, + "learning_rate": 1.2731781394667538e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8775792, + "step": 17835 + }, + { + "epoch": 2.354493863006467, + "grad_norm": 0.012034501880407333, + "learning_rate": 1.2727349357287322e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8778288, + "step": 17840 + }, + { + "epoch": 2.355153754784215, + "grad_norm": 0.0050900098867714405, + "learning_rate": 1.2722916741046951e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8780848, + "step": 17845 + }, + { + "epoch": 2.355813646561964, + "grad_norm": 0.005646945908665657, + "learning_rate": 1.2718483546887222e-06, + "loss": 0.0007, + "num_input_tokens_seen": 8783344, + "step": 17850 + }, + { + "epoch": 2.356473538339712, + "grad_norm": 0.021207528188824654, + "learning_rate": 1.2714049775749043e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8785776, + "step": 17855 + }, + { + "epoch": 2.357133430117461, + "grad_norm": 0.0018820096738636494, + "learning_rate": 1.2709615428573454e-06, + "loss": 0.1, + "num_input_tokens_seen": 8787952, + "step": 17860 + }, + { + "epoch": 2.357793321895209, + "grad_norm": 0.023304801434278488, + "learning_rate": 1.2705180506301614e-06, + "loss": 0.2573, + "num_input_tokens_seen": 8790512, + "step": 17865 + }, + { + "epoch": 2.3584532136729575, + "grad_norm": 0.012742428109049797, + "learning_rate": 1.2700745009874799e-06, + "loss": 0.0, + "num_input_tokens_seen": 8792816, + "step": 17870 + }, + { + "epoch": 2.359113105450706, + "grad_norm": 38.9813117980957, + "learning_rate": 1.2696308940234414e-06, + "loss": 0.0752, + "num_input_tokens_seen": 8795184, + "step": 17875 + }, + { + "epoch": 2.3597729972284545, + "grad_norm": 0.10750970989465714, + "learning_rate": 1.2691872298321978e-06, + "loss": 0.1042, + "num_input_tokens_seen": 8797808, + "step": 17880 + }, + { + "epoch": 2.360432889006203, + "grad_norm": 0.09598078578710556, + "learning_rate": 1.2687435085079143e-06, + "loss": 0.0013, + "num_input_tokens_seen": 8800368, + "step": 17885 + }, + { + "epoch": 2.3610927807839515, + "grad_norm": 0.028722476214170456, + "learning_rate": 1.2682997301447671e-06, + "loss": 0.2316, + "num_input_tokens_seen": 8802992, + "step": 17890 + }, + { + "epoch": 2.3617526725616997, + "grad_norm": 0.002694531111046672, + "learning_rate": 1.267855894836945e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8805744, + "step": 17895 + }, + { + "epoch": 2.3624125643394485, + "grad_norm": 0.006382739171385765, + "learning_rate": 1.267412002678649e-06, + "loss": 0.0021, + "num_input_tokens_seen": 8807984, + "step": 17900 + }, + { + "epoch": 2.3630724561171967, + "grad_norm": 0.1750914603471756, + "learning_rate": 1.2669680537640916e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8810480, + "step": 17905 + }, + { + "epoch": 2.3637323478949455, + "grad_norm": 0.13401751220226288, + "learning_rate": 1.2665240481874986e-06, + "loss": 0.001, + "num_input_tokens_seen": 8812784, + "step": 17910 + }, + { + "epoch": 2.3643922396726937, + "grad_norm": 0.0007668877951800823, + "learning_rate": 1.266079986043106e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8815344, + "step": 17915 + }, + { + "epoch": 2.365052131450442, + "grad_norm": 0.0035453704185783863, + "learning_rate": 1.2656358674251633e-06, + "loss": 0.1032, + "num_input_tokens_seen": 8817776, + "step": 17920 + }, + { + "epoch": 2.3657120232281907, + "grad_norm": 0.010339041240513325, + "learning_rate": 1.2651916924279311e-06, + "loss": 0.0633, + "num_input_tokens_seen": 8820464, + "step": 17925 + }, + { + "epoch": 2.366371915005939, + "grad_norm": 0.012196065858006477, + "learning_rate": 1.2647474611456827e-06, + "loss": 0.1189, + "num_input_tokens_seen": 8823280, + "step": 17930 + }, + { + "epoch": 2.3670318067836873, + "grad_norm": 160.40484619140625, + "learning_rate": 1.2643031736727029e-06, + "loss": 0.127, + "num_input_tokens_seen": 8825776, + "step": 17935 + }, + { + "epoch": 2.367691698561436, + "grad_norm": 0.037526555359363556, + "learning_rate": 1.2638588301032883e-06, + "loss": 0.1263, + "num_input_tokens_seen": 8828016, + "step": 17940 + }, + { + "epoch": 2.3683515903391843, + "grad_norm": 20.34902572631836, + "learning_rate": 1.2634144305317479e-06, + "loss": 0.0705, + "num_input_tokens_seen": 8830192, + "step": 17945 + }, + { + "epoch": 2.3690114821169326, + "grad_norm": 15.509211540222168, + "learning_rate": 1.2629699750524017e-06, + "loss": 0.0828, + "num_input_tokens_seen": 8832624, + "step": 17950 + }, + { + "epoch": 2.3696713738946813, + "grad_norm": 0.048276953399181366, + "learning_rate": 1.2625254637595829e-06, + "loss": 0.0549, + "num_input_tokens_seen": 8835248, + "step": 17955 + }, + { + "epoch": 2.3703312656724296, + "grad_norm": 0.18577997386455536, + "learning_rate": 1.2620808967476352e-06, + "loss": 0.0008, + "num_input_tokens_seen": 8838000, + "step": 17960 + }, + { + "epoch": 2.3709911574501783, + "grad_norm": 0.020469972863793373, + "learning_rate": 1.2616362741109154e-06, + "loss": 0.0022, + "num_input_tokens_seen": 8840688, + "step": 17965 + }, + { + "epoch": 2.3716510492279266, + "grad_norm": 0.005565830506384373, + "learning_rate": 1.2611915959437908e-06, + "loss": 0.1106, + "num_input_tokens_seen": 8843120, + "step": 17970 + }, + { + "epoch": 2.372310941005675, + "grad_norm": 0.043362200260162354, + "learning_rate": 1.2607468623406415e-06, + "loss": 0.0799, + "num_input_tokens_seen": 8845616, + "step": 17975 + }, + { + "epoch": 2.3729708327834236, + "grad_norm": 14.814071655273438, + "learning_rate": 1.2603020733958588e-06, + "loss": 0.002, + "num_input_tokens_seen": 8847856, + "step": 17980 + }, + { + "epoch": 2.373630724561172, + "grad_norm": 0.011253765784204006, + "learning_rate": 1.2598572292038459e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8850480, + "step": 17985 + }, + { + "epoch": 2.3742906163389206, + "grad_norm": 0.09818299859762192, + "learning_rate": 1.2594123298590177e-06, + "loss": 0.0006, + "num_input_tokens_seen": 8853232, + "step": 17990 + }, + { + "epoch": 2.374950508116669, + "grad_norm": 0.07499930262565613, + "learning_rate": 1.2589673754558014e-06, + "loss": 0.0675, + "num_input_tokens_seen": 8855664, + "step": 17995 + }, + { + "epoch": 2.375610399894417, + "grad_norm": 0.005919842980802059, + "learning_rate": 1.2585223660886347e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8858160, + "step": 18000 + }, + { + "epoch": 2.376270291672166, + "grad_norm": 21.43160057067871, + "learning_rate": 1.258077301851968e-06, + "loss": 0.0011, + "num_input_tokens_seen": 8860464, + "step": 18005 + }, + { + "epoch": 2.376930183449914, + "grad_norm": 0.008869780227541924, + "learning_rate": 1.2576321828402627e-06, + "loss": 0.0613, + "num_input_tokens_seen": 8862896, + "step": 18010 + }, + { + "epoch": 2.377590075227663, + "grad_norm": 0.04058117792010307, + "learning_rate": 1.2571870091479921e-06, + "loss": 0.0488, + "num_input_tokens_seen": 8865264, + "step": 18015 + }, + { + "epoch": 2.378249967005411, + "grad_norm": 0.13646909594535828, + "learning_rate": 1.2567417808696416e-06, + "loss": 0.0703, + "num_input_tokens_seen": 8867760, + "step": 18020 + }, + { + "epoch": 2.3789098587831594, + "grad_norm": 0.10359276086091995, + "learning_rate": 1.2562964980997072e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8870448, + "step": 18025 + }, + { + "epoch": 2.379569750560908, + "grad_norm": 0.049668870866298676, + "learning_rate": 1.2558511609326968e-06, + "loss": 0.046, + "num_input_tokens_seen": 8873136, + "step": 18030 + }, + { + "epoch": 2.3802296423386564, + "grad_norm": 0.005334274843335152, + "learning_rate": 1.2554057694631302e-06, + "loss": 0.0041, + "num_input_tokens_seen": 8875632, + "step": 18035 + }, + { + "epoch": 2.380889534116405, + "grad_norm": 0.021098989993333817, + "learning_rate": 1.2549603237855386e-06, + "loss": 0.0006, + "num_input_tokens_seen": 8878384, + "step": 18040 + }, + { + "epoch": 2.3815494258941534, + "grad_norm": 0.12568983435630798, + "learning_rate": 1.2545148239944644e-06, + "loss": 0.0615, + "num_input_tokens_seen": 8880944, + "step": 18045 + }, + { + "epoch": 2.3822093176719017, + "grad_norm": 0.002665475942194462, + "learning_rate": 1.2540692701844625e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8883568, + "step": 18050 + }, + { + "epoch": 2.3828692094496504, + "grad_norm": 0.14754042029380798, + "learning_rate": 1.253623662450097e-06, + "loss": 0.052, + "num_input_tokens_seen": 8886064, + "step": 18055 + }, + { + "epoch": 2.3835291012273987, + "grad_norm": 0.2536415755748749, + "learning_rate": 1.2531780008859464e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8888816, + "step": 18060 + }, + { + "epoch": 2.384188993005147, + "grad_norm": 0.5732733607292175, + "learning_rate": 1.252732285586598e-06, + "loss": 0.0738, + "num_input_tokens_seen": 8891248, + "step": 18065 + }, + { + "epoch": 2.3848488847828957, + "grad_norm": 0.2951080799102783, + "learning_rate": 1.2522865166466528e-06, + "loss": 0.0487, + "num_input_tokens_seen": 8893808, + "step": 18070 + }, + { + "epoch": 2.385508776560644, + "grad_norm": 27.109596252441406, + "learning_rate": 1.2518406941607207e-06, + "loss": 0.0383, + "num_input_tokens_seen": 8896304, + "step": 18075 + }, + { + "epoch": 2.3861686683383927, + "grad_norm": 0.049254752695560455, + "learning_rate": 1.2513948182234253e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8898672, + "step": 18080 + }, + { + "epoch": 2.386828560116141, + "grad_norm": 0.0034236342180520296, + "learning_rate": 1.2509488889293998e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8901168, + "step": 18085 + }, + { + "epoch": 2.3874884518938893, + "grad_norm": 0.6597353219985962, + "learning_rate": 1.2505029063732898e-06, + "loss": 0.0005, + "num_input_tokens_seen": 8903600, + "step": 18090 + }, + { + "epoch": 2.388148343671638, + "grad_norm": 0.019330337643623352, + "learning_rate": 1.2500568706497526e-06, + "loss": 0.0954, + "num_input_tokens_seen": 8906032, + "step": 18095 + }, + { + "epoch": 2.3888082354493863, + "grad_norm": 21.249752044677734, + "learning_rate": 1.2496107818534548e-06, + "loss": 0.0035, + "num_input_tokens_seen": 8908400, + "step": 18100 + }, + { + "epoch": 2.3894681272271345, + "grad_norm": 0.006237414199858904, + "learning_rate": 1.2491646400790766e-06, + "loss": 0.0573, + "num_input_tokens_seen": 8910832, + "step": 18105 + }, + { + "epoch": 2.3901280190048833, + "grad_norm": 0.045193735510110855, + "learning_rate": 1.2487184454213073e-06, + "loss": 0.0613, + "num_input_tokens_seen": 8913200, + "step": 18110 + }, + { + "epoch": 2.3907879107826315, + "grad_norm": 0.06255914270877838, + "learning_rate": 1.2482721979748494e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8915568, + "step": 18115 + }, + { + "epoch": 2.3914478025603803, + "grad_norm": 0.07931576669216156, + "learning_rate": 1.2478258978344149e-06, + "loss": 0.0691, + "num_input_tokens_seen": 8917680, + "step": 18120 + }, + { + "epoch": 2.3921076943381285, + "grad_norm": 0.2358531802892685, + "learning_rate": 1.2473795450947287e-06, + "loss": 0.0109, + "num_input_tokens_seen": 8920112, + "step": 18125 + }, + { + "epoch": 2.392767586115877, + "grad_norm": 0.04463690146803856, + "learning_rate": 1.2469331398505254e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8922544, + "step": 18130 + }, + { + "epoch": 2.3934274778936255, + "grad_norm": 0.07052083313465118, + "learning_rate": 1.246486682196551e-06, + "loss": 0.094, + "num_input_tokens_seen": 8925040, + "step": 18135 + }, + { + "epoch": 2.394087369671374, + "grad_norm": 0.026072848588228226, + "learning_rate": 1.2460401722275633e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8927408, + "step": 18140 + }, + { + "epoch": 2.3947472614491225, + "grad_norm": 0.030193530023097992, + "learning_rate": 1.2455936100383309e-06, + "loss": 0.1117, + "num_input_tokens_seen": 8929840, + "step": 18145 + }, + { + "epoch": 2.395407153226871, + "grad_norm": 0.018104439601302147, + "learning_rate": 1.2451469957236334e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8932272, + "step": 18150 + }, + { + "epoch": 2.396067045004619, + "grad_norm": 38.890846252441406, + "learning_rate": 1.2447003293782607e-06, + "loss": 0.0591, + "num_input_tokens_seen": 8934640, + "step": 18155 + }, + { + "epoch": 2.396726936782368, + "grad_norm": 0.17797276377677917, + "learning_rate": 1.2442536110970152e-06, + "loss": 0.0025, + "num_input_tokens_seen": 8937200, + "step": 18160 + }, + { + "epoch": 2.397386828560116, + "grad_norm": 0.001440043211914599, + "learning_rate": 1.2438068409747097e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8939568, + "step": 18165 + }, + { + "epoch": 2.398046720337865, + "grad_norm": 0.002936235163360834, + "learning_rate": 1.2433600191061677e-06, + "loss": 0.0413, + "num_input_tokens_seen": 8942000, + "step": 18170 + }, + { + "epoch": 2.398706612115613, + "grad_norm": 0.006453686859458685, + "learning_rate": 1.242913145586224e-06, + "loss": 0.0088, + "num_input_tokens_seen": 8944688, + "step": 18175 + }, + { + "epoch": 2.3993665038933614, + "grad_norm": 83.48731231689453, + "learning_rate": 1.2424662205097241e-06, + "loss": 0.0345, + "num_input_tokens_seen": 8947312, + "step": 18180 + }, + { + "epoch": 2.40002639567111, + "grad_norm": 0.07596401125192642, + "learning_rate": 1.2420192439715247e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8949808, + "step": 18185 + }, + { + "epoch": 2.4006862874488584, + "grad_norm": 0.005037888418883085, + "learning_rate": 1.2415722160664933e-06, + "loss": 0.0003, + "num_input_tokens_seen": 8952112, + "step": 18190 + }, + { + "epoch": 2.4013461792266066, + "grad_norm": 0.09627171605825424, + "learning_rate": 1.2411251368895085e-06, + "loss": 0.1239, + "num_input_tokens_seen": 8954800, + "step": 18195 + }, + { + "epoch": 2.4020060710043554, + "grad_norm": 0.0015911199152469635, + "learning_rate": 1.2406780065354592e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8957360, + "step": 18200 + }, + { + "epoch": 2.4026659627821036, + "grad_norm": 0.015304110012948513, + "learning_rate": 1.240230825099246e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8960112, + "step": 18205 + }, + { + "epoch": 2.4033258545598524, + "grad_norm": 0.9241950511932373, + "learning_rate": 1.2397835926757798e-06, + "loss": 0.0006, + "num_input_tokens_seen": 8962608, + "step": 18210 + }, + { + "epoch": 2.4039857463376006, + "grad_norm": 66.03406524658203, + "learning_rate": 1.2393363093599823e-06, + "loss": 0.133, + "num_input_tokens_seen": 8965040, + "step": 18215 + }, + { + "epoch": 2.404645638115349, + "grad_norm": 0.0014983558794483542, + "learning_rate": 1.2388889752467867e-06, + "loss": 0.0659, + "num_input_tokens_seen": 8967216, + "step": 18220 + }, + { + "epoch": 2.4053055298930976, + "grad_norm": 0.010034170933067799, + "learning_rate": 1.2384415904311357e-06, + "loss": 0.0, + "num_input_tokens_seen": 8969712, + "step": 18225 + }, + { + "epoch": 2.405965421670846, + "grad_norm": 0.17632248997688293, + "learning_rate": 1.2379941550079836e-06, + "loss": 0.0001, + "num_input_tokens_seen": 8972208, + "step": 18230 + }, + { + "epoch": 2.406625313448594, + "grad_norm": 27.587594985961914, + "learning_rate": 1.2375466690722957e-06, + "loss": 0.105, + "num_input_tokens_seen": 8974768, + "step": 18235 + }, + { + "epoch": 2.407285205226343, + "grad_norm": 0.012240786105394363, + "learning_rate": 1.2370991327190473e-06, + "loss": 0.0736, + "num_input_tokens_seen": 8977200, + "step": 18240 + }, + { + "epoch": 2.407945097004091, + "grad_norm": 0.0025328362826257944, + "learning_rate": 1.2366515460432255e-06, + "loss": 0.0, + "num_input_tokens_seen": 8979568, + "step": 18245 + }, + { + "epoch": 2.40860498878184, + "grad_norm": 0.08074627071619034, + "learning_rate": 1.2362039091398259e-06, + "loss": 0.0907, + "num_input_tokens_seen": 8981936, + "step": 18250 + }, + { + "epoch": 2.409264880559588, + "grad_norm": 0.031857311725616455, + "learning_rate": 1.235756222103858e-06, + "loss": 0.1141, + "num_input_tokens_seen": 8984368, + "step": 18255 + }, + { + "epoch": 2.4099247723373365, + "grad_norm": 17.715085983276367, + "learning_rate": 1.2353084850303386e-06, + "loss": 0.1377, + "num_input_tokens_seen": 8986736, + "step": 18260 + }, + { + "epoch": 2.410584664115085, + "grad_norm": 59.32007598876953, + "learning_rate": 1.2348606980142973e-06, + "loss": 0.1191, + "num_input_tokens_seen": 8988720, + "step": 18265 + }, + { + "epoch": 2.4112445558928335, + "grad_norm": 0.045384231954813004, + "learning_rate": 1.2344128611507733e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8990960, + "step": 18270 + }, + { + "epoch": 2.411904447670582, + "grad_norm": 0.16882988810539246, + "learning_rate": 1.2339649745348176e-06, + "loss": 0.0002, + "num_input_tokens_seen": 8993328, + "step": 18275 + }, + { + "epoch": 2.4125643394483305, + "grad_norm": 0.03319350630044937, + "learning_rate": 1.23351703826149e-06, + "loss": 0.0004, + "num_input_tokens_seen": 8995632, + "step": 18280 + }, + { + "epoch": 2.4132242312260788, + "grad_norm": 0.022699033841490746, + "learning_rate": 1.2330690524258618e-06, + "loss": 0.046, + "num_input_tokens_seen": 8998256, + "step": 18285 + }, + { + "epoch": 2.4138841230038275, + "grad_norm": 0.07350515574216843, + "learning_rate": 1.2326210171230152e-06, + "loss": 0.1114, + "num_input_tokens_seen": 9000368, + "step": 18290 + }, + { + "epoch": 2.4145440147815758, + "grad_norm": 0.34410330653190613, + "learning_rate": 1.2321729324480422e-06, + "loss": 0.0758, + "num_input_tokens_seen": 9002800, + "step": 18295 + }, + { + "epoch": 2.4152039065593245, + "grad_norm": 15.627949714660645, + "learning_rate": 1.2317247984960455e-06, + "loss": 0.0675, + "num_input_tokens_seen": 9005232, + "step": 18300 + }, + { + "epoch": 2.4158637983370728, + "grad_norm": 0.03578624129295349, + "learning_rate": 1.2312766153621383e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9007920, + "step": 18305 + }, + { + "epoch": 2.416523690114821, + "grad_norm": 0.06478110700845718, + "learning_rate": 1.2308283831414444e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9010416, + "step": 18310 + }, + { + "epoch": 2.4171835818925698, + "grad_norm": 0.13254325091838837, + "learning_rate": 1.2303801019290978e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9013168, + "step": 18315 + }, + { + "epoch": 2.417843473670318, + "grad_norm": 0.39260151982307434, + "learning_rate": 1.2299317718202424e-06, + "loss": 0.0476, + "num_input_tokens_seen": 9015728, + "step": 18320 + }, + { + "epoch": 2.4185033654480668, + "grad_norm": 0.05883554369211197, + "learning_rate": 1.229483392910034e-06, + "loss": 0.0326, + "num_input_tokens_seen": 9018416, + "step": 18325 + }, + { + "epoch": 2.419163257225815, + "grad_norm": 0.02526324987411499, + "learning_rate": 1.229034965293637e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9020976, + "step": 18330 + }, + { + "epoch": 2.4198231490035633, + "grad_norm": 0.006224233657121658, + "learning_rate": 1.2285864890662272e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9023728, + "step": 18335 + }, + { + "epoch": 2.420483040781312, + "grad_norm": 0.03532911092042923, + "learning_rate": 1.2281379643229904e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9026096, + "step": 18340 + }, + { + "epoch": 2.4211429325590603, + "grad_norm": 25.137794494628906, + "learning_rate": 1.2276893911591226e-06, + "loss": 0.0509, + "num_input_tokens_seen": 9028656, + "step": 18345 + }, + { + "epoch": 2.4218028243368086, + "grad_norm": 0.00828655157238245, + "learning_rate": 1.2272407696698303e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9031472, + "step": 18350 + }, + { + "epoch": 2.4224627161145573, + "grad_norm": 0.13335035741329193, + "learning_rate": 1.2267920999503302e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9034352, + "step": 18355 + }, + { + "epoch": 2.4231226078923056, + "grad_norm": 0.013290558941662312, + "learning_rate": 1.2263433820958494e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9036720, + "step": 18360 + }, + { + "epoch": 2.423782499670054, + "grad_norm": 0.19421987235546112, + "learning_rate": 1.2258946162016247e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9039216, + "step": 18365 + }, + { + "epoch": 2.4244423914478026, + "grad_norm": 23.98882293701172, + "learning_rate": 1.2254458023629035e-06, + "loss": 0.0595, + "num_input_tokens_seen": 9041584, + "step": 18370 + }, + { + "epoch": 2.425102283225551, + "grad_norm": 0.0030773465987294912, + "learning_rate": 1.2249969406749432e-06, + "loss": 0.0442, + "num_input_tokens_seen": 9043888, + "step": 18375 + }, + { + "epoch": 2.4257621750032996, + "grad_norm": 0.006509008351713419, + "learning_rate": 1.2245480312330117e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9046320, + "step": 18380 + }, + { + "epoch": 2.426422066781048, + "grad_norm": 0.011278158985078335, + "learning_rate": 1.2240990741323867e-06, + "loss": 0.0675, + "num_input_tokens_seen": 9048880, + "step": 18385 + }, + { + "epoch": 2.427081958558796, + "grad_norm": 0.00435879360884428, + "learning_rate": 1.2236500694683555e-06, + "loss": 0.0, + "num_input_tokens_seen": 9051312, + "step": 18390 + }, + { + "epoch": 2.427741850336545, + "grad_norm": 0.002087733941152692, + "learning_rate": 1.223201017336217e-06, + "loss": 0.1489, + "num_input_tokens_seen": 9054000, + "step": 18395 + }, + { + "epoch": 2.428401742114293, + "grad_norm": 0.0005177515558898449, + "learning_rate": 1.222751917831279e-06, + "loss": 0.001, + "num_input_tokens_seen": 9056240, + "step": 18400 + }, + { + "epoch": 2.429061633892042, + "grad_norm": 0.002716792980208993, + "learning_rate": 1.2223027710488591e-06, + "loss": 0.1132, + "num_input_tokens_seen": 9058672, + "step": 18405 + }, + { + "epoch": 2.42972152566979, + "grad_norm": 0.032944511622190475, + "learning_rate": 1.221853577084286e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9061104, + "step": 18410 + }, + { + "epoch": 2.4303814174475384, + "grad_norm": 0.03812925145030022, + "learning_rate": 1.221404336032898e-06, + "loss": 0.1807, + "num_input_tokens_seen": 9063280, + "step": 18415 + }, + { + "epoch": 2.431041309225287, + "grad_norm": 0.022235559299588203, + "learning_rate": 1.2209550479900425e-06, + "loss": 0.0442, + "num_input_tokens_seen": 9065840, + "step": 18420 + }, + { + "epoch": 2.4317012010030354, + "grad_norm": 0.004477641079574823, + "learning_rate": 1.2205057130510783e-06, + "loss": 0.0, + "num_input_tokens_seen": 9068016, + "step": 18425 + }, + { + "epoch": 2.432361092780784, + "grad_norm": 0.01159152202308178, + "learning_rate": 1.2200563313113732e-06, + "loss": 0.0813, + "num_input_tokens_seen": 9070448, + "step": 18430 + }, + { + "epoch": 2.4330209845585324, + "grad_norm": 0.01895478554069996, + "learning_rate": 1.2196069028663057e-06, + "loss": 0.0473, + "num_input_tokens_seen": 9072880, + "step": 18435 + }, + { + "epoch": 2.4336808763362807, + "grad_norm": 0.10171425342559814, + "learning_rate": 1.219157427811263e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9075248, + "step": 18440 + }, + { + "epoch": 2.4343407681140294, + "grad_norm": 22.22634506225586, + "learning_rate": 1.218707906241643e-06, + "loss": 0.0627, + "num_input_tokens_seen": 9077744, + "step": 18445 + }, + { + "epoch": 2.4350006598917777, + "grad_norm": 0.030533935874700546, + "learning_rate": 1.2182583382528543e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9080560, + "step": 18450 + }, + { + "epoch": 2.4356605516695264, + "grad_norm": 0.002572020050138235, + "learning_rate": 1.2178087239403133e-06, + "loss": 0.0109, + "num_input_tokens_seen": 9082992, + "step": 18455 + }, + { + "epoch": 2.4363204434472747, + "grad_norm": 0.0030550749506801367, + "learning_rate": 1.2173590633994479e-06, + "loss": 0.0844, + "num_input_tokens_seen": 9085552, + "step": 18460 + }, + { + "epoch": 2.436980335225023, + "grad_norm": 0.007434781640768051, + "learning_rate": 1.2169093567256955e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9087728, + "step": 18465 + }, + { + "epoch": 2.4376402270027717, + "grad_norm": 0.05250757560133934, + "learning_rate": 1.2164596040145028e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9089968, + "step": 18470 + }, + { + "epoch": 2.43830011878052, + "grad_norm": 0.003924153745174408, + "learning_rate": 1.2160098053613267e-06, + "loss": 0.0011, + "num_input_tokens_seen": 9092464, + "step": 18475 + }, + { + "epoch": 2.4389600105582683, + "grad_norm": 0.022841813042759895, + "learning_rate": 1.2155599608616331e-06, + "loss": 0.0345, + "num_input_tokens_seen": 9095088, + "step": 18480 + }, + { + "epoch": 2.439619902336017, + "grad_norm": 0.45985811948776245, + "learning_rate": 1.2151100706108996e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9097712, + "step": 18485 + }, + { + "epoch": 2.4402797941137653, + "grad_norm": 19.873098373413086, + "learning_rate": 1.2146601347046107e-06, + "loss": 0.0643, + "num_input_tokens_seen": 9100336, + "step": 18490 + }, + { + "epoch": 2.4409396858915136, + "grad_norm": 0.10789693892002106, + "learning_rate": 1.214210153238263e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9103024, + "step": 18495 + }, + { + "epoch": 2.4415995776692623, + "grad_norm": 0.06011528521776199, + "learning_rate": 1.2137601263073613e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9105520, + "step": 18500 + }, + { + "epoch": 2.4422594694470106, + "grad_norm": 0.0035535397473722696, + "learning_rate": 1.2133100540074206e-06, + "loss": 0.0689, + "num_input_tokens_seen": 9108016, + "step": 18505 + }, + { + "epoch": 2.4429193612247593, + "grad_norm": 0.05881497263908386, + "learning_rate": 1.2128599364339663e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9110320, + "step": 18510 + }, + { + "epoch": 2.4435792530025076, + "grad_norm": 0.047487806528806686, + "learning_rate": 1.212409773682531e-06, + "loss": 0.0113, + "num_input_tokens_seen": 9112624, + "step": 18515 + }, + { + "epoch": 2.444239144780256, + "grad_norm": 0.05220465362071991, + "learning_rate": 1.2119595658486599e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9115120, + "step": 18520 + }, + { + "epoch": 2.4448990365580046, + "grad_norm": 24.626712799072266, + "learning_rate": 1.2115093130279055e-06, + "loss": 0.111, + "num_input_tokens_seen": 9117680, + "step": 18525 + }, + { + "epoch": 2.445558928335753, + "grad_norm": 0.04938157647848129, + "learning_rate": 1.2110590153158313e-06, + "loss": 0.1346, + "num_input_tokens_seen": 9120112, + "step": 18530 + }, + { + "epoch": 2.4462188201135016, + "grad_norm": 0.06872491538524628, + "learning_rate": 1.2106086728080095e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9122800, + "step": 18535 + }, + { + "epoch": 2.44687871189125, + "grad_norm": 0.008710040710866451, + "learning_rate": 1.2101582856000219e-06, + "loss": 0.0473, + "num_input_tokens_seen": 9124976, + "step": 18540 + }, + { + "epoch": 2.447538603668998, + "grad_norm": 0.006348209455609322, + "learning_rate": 1.20970785378746e-06, + "loss": 0.0337, + "num_input_tokens_seen": 9127472, + "step": 18545 + }, + { + "epoch": 2.448198495446747, + "grad_norm": 0.1652994155883789, + "learning_rate": 1.2092573774659247e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9130288, + "step": 18550 + }, + { + "epoch": 2.448858387224495, + "grad_norm": 0.16471163928508759, + "learning_rate": 1.2088068567310266e-06, + "loss": 0.0011, + "num_input_tokens_seen": 9132528, + "step": 18555 + }, + { + "epoch": 2.449518279002244, + "grad_norm": 0.025540076196193695, + "learning_rate": 1.2083562916783852e-06, + "loss": 0.197, + "num_input_tokens_seen": 9135152, + "step": 18560 + }, + { + "epoch": 2.450178170779992, + "grad_norm": 0.6089839935302734, + "learning_rate": 1.2079056824036294e-06, + "loss": 0.088, + "num_input_tokens_seen": 9138032, + "step": 18565 + }, + { + "epoch": 2.4508380625577404, + "grad_norm": 0.03979070857167244, + "learning_rate": 1.207455029002398e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9140528, + "step": 18570 + }, + { + "epoch": 2.451497954335489, + "grad_norm": 0.30008092522621155, + "learning_rate": 1.207004331570339e-06, + "loss": 0.0495, + "num_input_tokens_seen": 9142768, + "step": 18575 + }, + { + "epoch": 2.4521578461132374, + "grad_norm": 0.15778286755084991, + "learning_rate": 1.2065535902031098e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9145392, + "step": 18580 + }, + { + "epoch": 2.452817737890986, + "grad_norm": 25.180299758911133, + "learning_rate": 1.206102804996377e-06, + "loss": 0.1586, + "num_input_tokens_seen": 9147696, + "step": 18585 + }, + { + "epoch": 2.4534776296687344, + "grad_norm": 82.59857940673828, + "learning_rate": 1.2056519760458162e-06, + "loss": 0.0845, + "num_input_tokens_seen": 9150320, + "step": 18590 + }, + { + "epoch": 2.4541375214464827, + "grad_norm": 0.2946361303329468, + "learning_rate": 1.2052011034471123e-06, + "loss": 0.0212, + "num_input_tokens_seen": 9153072, + "step": 18595 + }, + { + "epoch": 2.4547974132242314, + "grad_norm": 0.18923722207546234, + "learning_rate": 1.2047501872959606e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9155504, + "step": 18600 + }, + { + "epoch": 2.4554573050019797, + "grad_norm": 0.07883734256029129, + "learning_rate": 1.204299227688064e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9158064, + "step": 18605 + }, + { + "epoch": 2.456117196779728, + "grad_norm": 0.01335981860756874, + "learning_rate": 1.203848224719136e-06, + "loss": 0.1095, + "num_input_tokens_seen": 9160688, + "step": 18610 + }, + { + "epoch": 2.4567770885574767, + "grad_norm": 0.035853851586580276, + "learning_rate": 1.2033971784848985e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9163056, + "step": 18615 + }, + { + "epoch": 2.457436980335225, + "grad_norm": 0.4601845443248749, + "learning_rate": 1.2029460890810826e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9165680, + "step": 18620 + }, + { + "epoch": 2.4580968721129732, + "grad_norm": 0.09914588928222656, + "learning_rate": 1.202494956603429e-06, + "loss": 0.0215, + "num_input_tokens_seen": 9167984, + "step": 18625 + }, + { + "epoch": 2.458756763890722, + "grad_norm": 0.016405778005719185, + "learning_rate": 1.2020437811476872e-06, + "loss": 0.0013, + "num_input_tokens_seen": 9170608, + "step": 18630 + }, + { + "epoch": 2.4594166556684702, + "grad_norm": 0.012795671820640564, + "learning_rate": 1.2015925628096157e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9172976, + "step": 18635 + }, + { + "epoch": 2.460076547446219, + "grad_norm": 0.01026566606014967, + "learning_rate": 1.2011413016849829e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9175600, + "step": 18640 + }, + { + "epoch": 2.4607364392239672, + "grad_norm": 0.005955888889729977, + "learning_rate": 1.2006899978695653e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9177904, + "step": 18645 + }, + { + "epoch": 2.4613963310017155, + "grad_norm": 0.03424505889415741, + "learning_rate": 1.200238651459149e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9180400, + "step": 18650 + }, + { + "epoch": 2.4620562227794642, + "grad_norm": 0.004490535706281662, + "learning_rate": 1.1997872625495284e-06, + "loss": 0.0782, + "num_input_tokens_seen": 9182896, + "step": 18655 + }, + { + "epoch": 2.4627161145572125, + "grad_norm": 0.003114398568868637, + "learning_rate": 1.1993358312365087e-06, + "loss": 0.0021, + "num_input_tokens_seen": 9185328, + "step": 18660 + }, + { + "epoch": 2.4633760063349612, + "grad_norm": 0.12822800874710083, + "learning_rate": 1.198884357615902e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9187760, + "step": 18665 + }, + { + "epoch": 2.4640358981127095, + "grad_norm": 0.0007764756446704268, + "learning_rate": 1.1984328417835307e-06, + "loss": 0.0844, + "num_input_tokens_seen": 9189872, + "step": 18670 + }, + { + "epoch": 2.464695789890458, + "grad_norm": 0.095713309943676, + "learning_rate": 1.1979812838352257e-06, + "loss": 0.1752, + "num_input_tokens_seen": 9192176, + "step": 18675 + }, + { + "epoch": 2.4653556816682065, + "grad_norm": 0.03003990463912487, + "learning_rate": 1.1975296838668266e-06, + "loss": 0.0296, + "num_input_tokens_seen": 9194480, + "step": 18680 + }, + { + "epoch": 2.466015573445955, + "grad_norm": 0.017723990604281425, + "learning_rate": 1.1970780419741828e-06, + "loss": 0.0712, + "num_input_tokens_seen": 9196976, + "step": 18685 + }, + { + "epoch": 2.4666754652237035, + "grad_norm": 0.031016312539577484, + "learning_rate": 1.1966263582531517e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9199216, + "step": 18690 + }, + { + "epoch": 2.467335357001452, + "grad_norm": 0.030972057953476906, + "learning_rate": 1.1961746327996e-06, + "loss": 0.0064, + "num_input_tokens_seen": 9201648, + "step": 18695 + }, + { + "epoch": 2.4679952487792, + "grad_norm": 13.240409851074219, + "learning_rate": 1.1957228657094027e-06, + "loss": 0.0253, + "num_input_tokens_seen": 9203760, + "step": 18700 + }, + { + "epoch": 2.468655140556949, + "grad_norm": 0.012332662008702755, + "learning_rate": 1.1952710570784447e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9206000, + "step": 18705 + }, + { + "epoch": 2.469315032334697, + "grad_norm": 14.274161338806152, + "learning_rate": 1.194819207002619e-06, + "loss": 0.1172, + "num_input_tokens_seen": 9208496, + "step": 18710 + }, + { + "epoch": 2.469974924112446, + "grad_norm": 0.16916923224925995, + "learning_rate": 1.194367315577827e-06, + "loss": 0.1238, + "num_input_tokens_seen": 9211120, + "step": 18715 + }, + { + "epoch": 2.470634815890194, + "grad_norm": 18.33597755432129, + "learning_rate": 1.1939153828999801e-06, + "loss": 0.1403, + "num_input_tokens_seen": 9213744, + "step": 18720 + }, + { + "epoch": 2.4712947076679423, + "grad_norm": 0.18467681109905243, + "learning_rate": 1.1934634090649973e-06, + "loss": 0.0015, + "num_input_tokens_seen": 9215856, + "step": 18725 + }, + { + "epoch": 2.471954599445691, + "grad_norm": 0.28404808044433594, + "learning_rate": 1.1930113941688072e-06, + "loss": 0.1295, + "num_input_tokens_seen": 9218160, + "step": 18730 + }, + { + "epoch": 2.4726144912234393, + "grad_norm": 1.6605956554412842, + "learning_rate": 1.1925593383073458e-06, + "loss": 0.0516, + "num_input_tokens_seen": 9220848, + "step": 18735 + }, + { + "epoch": 2.4732743830011876, + "grad_norm": 19.45949935913086, + "learning_rate": 1.1921072415765595e-06, + "loss": 0.0631, + "num_input_tokens_seen": 9223344, + "step": 18740 + }, + { + "epoch": 2.4739342747789363, + "grad_norm": 0.016054624691605568, + "learning_rate": 1.1916551040724026e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9225648, + "step": 18745 + }, + { + "epoch": 2.4745941665566846, + "grad_norm": 0.03807045519351959, + "learning_rate": 1.191202925890837e-06, + "loss": 0.0413, + "num_input_tokens_seen": 9228016, + "step": 18750 + }, + { + "epoch": 2.475254058334433, + "grad_norm": 0.28098002076148987, + "learning_rate": 1.1907507071278358e-06, + "loss": 0.0693, + "num_input_tokens_seen": 9230192, + "step": 18755 + }, + { + "epoch": 2.4759139501121816, + "grad_norm": 0.03773991018533707, + "learning_rate": 1.1902984478793776e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9232624, + "step": 18760 + }, + { + "epoch": 2.47657384188993, + "grad_norm": 14.075786590576172, + "learning_rate": 1.1898461482414524e-06, + "loss": 0.0532, + "num_input_tokens_seen": 9234992, + "step": 18765 + }, + { + "epoch": 2.4772337336676786, + "grad_norm": 0.1268509477376938, + "learning_rate": 1.1893938083100568e-06, + "loss": 0.0014, + "num_input_tokens_seen": 9237360, + "step": 18770 + }, + { + "epoch": 2.477893625445427, + "grad_norm": 0.7259445190429688, + "learning_rate": 1.188941428181197e-06, + "loss": 0.0253, + "num_input_tokens_seen": 9239664, + "step": 18775 + }, + { + "epoch": 2.478553517223175, + "grad_norm": 1.0823359489440918, + "learning_rate": 1.188489007950887e-06, + "loss": 0.0016, + "num_input_tokens_seen": 9241968, + "step": 18780 + }, + { + "epoch": 2.479213409000924, + "grad_norm": 0.007959727197885513, + "learning_rate": 1.1880365477151501e-06, + "loss": 0.0, + "num_input_tokens_seen": 9244336, + "step": 18785 + }, + { + "epoch": 2.479873300778672, + "grad_norm": 0.006146569736301899, + "learning_rate": 1.1875840475700175e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9246960, + "step": 18790 + }, + { + "epoch": 2.480533192556421, + "grad_norm": 0.0030527012422680855, + "learning_rate": 1.1871315076115293e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9249264, + "step": 18795 + }, + { + "epoch": 2.481193084334169, + "grad_norm": 0.03815356269478798, + "learning_rate": 1.186678927935734e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9251504, + "step": 18800 + }, + { + "epoch": 2.4818529761119175, + "grad_norm": 0.0036139509174972773, + "learning_rate": 1.1862263086386875e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9253552, + "step": 18805 + }, + { + "epoch": 2.482512867889666, + "grad_norm": 0.17686735093593597, + "learning_rate": 1.1857736498164559e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9255984, + "step": 18810 + }, + { + "epoch": 2.4831727596674145, + "grad_norm": 0.08808859437704086, + "learning_rate": 1.1853209515651122e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9258352, + "step": 18815 + }, + { + "epoch": 2.483832651445163, + "grad_norm": 0.0034736869856715202, + "learning_rate": 1.1848682139807387e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9260784, + "step": 18820 + }, + { + "epoch": 2.4844925432229115, + "grad_norm": 0.0006168729742057621, + "learning_rate": 1.1844154371594254e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9263408, + "step": 18825 + }, + { + "epoch": 2.4851524350006597, + "grad_norm": 0.007578112650662661, + "learning_rate": 1.183962621197271e-06, + "loss": 0.0673, + "num_input_tokens_seen": 9265584, + "step": 18830 + }, + { + "epoch": 2.4858123267784085, + "grad_norm": 24.65241050720215, + "learning_rate": 1.1835097661903826e-06, + "loss": 0.1876, + "num_input_tokens_seen": 9267888, + "step": 18835 + }, + { + "epoch": 2.4864722185561567, + "grad_norm": 0.027628377079963684, + "learning_rate": 1.1830568722348748e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9270256, + "step": 18840 + }, + { + "epoch": 2.4871321103339055, + "grad_norm": 0.005967118311673403, + "learning_rate": 1.182603939426872e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9272944, + "step": 18845 + }, + { + "epoch": 2.4877920021116537, + "grad_norm": 0.11982940882444382, + "learning_rate": 1.1821509678625048e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9275376, + "step": 18850 + }, + { + "epoch": 2.488451893889402, + "grad_norm": 11.578821182250977, + "learning_rate": 1.181697957637914e-06, + "loss": 0.0368, + "num_input_tokens_seen": 9277680, + "step": 18855 + }, + { + "epoch": 2.4891117856671507, + "grad_norm": 33.48732376098633, + "learning_rate": 1.1812449088492474e-06, + "loss": 0.2397, + "num_input_tokens_seen": 9280112, + "step": 18860 + }, + { + "epoch": 2.489771677444899, + "grad_norm": 0.14362753927707672, + "learning_rate": 1.1807918215926614e-06, + "loss": 0.0019, + "num_input_tokens_seen": 9282544, + "step": 18865 + }, + { + "epoch": 2.4904315692226473, + "grad_norm": 14.686333656311035, + "learning_rate": 1.1803386959643204e-06, + "loss": 0.1361, + "num_input_tokens_seen": 9284976, + "step": 18870 + }, + { + "epoch": 2.491091461000396, + "grad_norm": 0.007154212798923254, + "learning_rate": 1.179885532060397e-06, + "loss": 0.0009, + "num_input_tokens_seen": 9287472, + "step": 18875 + }, + { + "epoch": 2.4917513527781443, + "grad_norm": 0.23903542757034302, + "learning_rate": 1.1794323299770724e-06, + "loss": 0.0829, + "num_input_tokens_seen": 9290096, + "step": 18880 + }, + { + "epoch": 2.492411244555893, + "grad_norm": 0.03929462283849716, + "learning_rate": 1.1789790898105346e-06, + "loss": 0.0012, + "num_input_tokens_seen": 9292464, + "step": 18885 + }, + { + "epoch": 2.4930711363336413, + "grad_norm": 0.009414401836693287, + "learning_rate": 1.1785258116569816e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9294896, + "step": 18890 + }, + { + "epoch": 2.4937310281113896, + "grad_norm": 0.06671901047229767, + "learning_rate": 1.1780724956126173e-06, + "loss": 0.0015, + "num_input_tokens_seen": 9297456, + "step": 18895 + }, + { + "epoch": 2.4943909198891383, + "grad_norm": 26.247026443481445, + "learning_rate": 1.1776191417736558e-06, + "loss": 0.1238, + "num_input_tokens_seen": 9299888, + "step": 18900 + }, + { + "epoch": 2.4950508116668866, + "grad_norm": 24.444671630859375, + "learning_rate": 1.1771657502363175e-06, + "loss": 0.0385, + "num_input_tokens_seen": 9302640, + "step": 18905 + }, + { + "epoch": 2.495710703444635, + "grad_norm": 0.034380197525024414, + "learning_rate": 1.1767123210968315e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9305392, + "step": 18910 + }, + { + "epoch": 2.4963705952223836, + "grad_norm": 0.05007326230406761, + "learning_rate": 1.1762588544514352e-06, + "loss": 0.0991, + "num_input_tokens_seen": 9307888, + "step": 18915 + }, + { + "epoch": 2.497030487000132, + "grad_norm": 0.036398421972990036, + "learning_rate": 1.1758053503963733e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9310192, + "step": 18920 + }, + { + "epoch": 2.4976903787778806, + "grad_norm": 15.678851127624512, + "learning_rate": 1.1753518090278991e-06, + "loss": 0.1931, + "num_input_tokens_seen": 9312688, + "step": 18925 + }, + { + "epoch": 2.498350270555629, + "grad_norm": 0.07849349826574326, + "learning_rate": 1.1748982304422729e-06, + "loss": 0.0463, + "num_input_tokens_seen": 9315248, + "step": 18930 + }, + { + "epoch": 2.499010162333377, + "grad_norm": 0.720598578453064, + "learning_rate": 1.174444614735764e-06, + "loss": 0.0443, + "num_input_tokens_seen": 9318000, + "step": 18935 + }, + { + "epoch": 2.499670054111126, + "grad_norm": 0.45224106311798096, + "learning_rate": 1.1739909620046485e-06, + "loss": 0.0663, + "num_input_tokens_seen": 9320752, + "step": 18940 + }, + { + "epoch": 2.500329945888874, + "grad_norm": 26.452760696411133, + "learning_rate": 1.1735372723452114e-06, + "loss": 0.0352, + "num_input_tokens_seen": 9323632, + "step": 18945 + }, + { + "epoch": 2.500989837666623, + "grad_norm": 0.135438933968544, + "learning_rate": 1.1730835458537454e-06, + "loss": 0.0962, + "num_input_tokens_seen": 9326256, + "step": 18950 + }, + { + "epoch": 2.500989837666623, + "eval_loss": 0.11418119072914124, + "eval_runtime": 7.92, + "eval_samples_per_second": 850.38, + "eval_steps_per_second": 106.313, + "num_input_tokens_seen": 9326256, + "step": 18950 + }, + { + "epoch": 2.501649729444371, + "grad_norm": 0.07885482162237167, + "learning_rate": 1.1726297826265497e-06, + "loss": 0.0332, + "num_input_tokens_seen": 9328688, + "step": 18955 + }, + { + "epoch": 2.5023096212221194, + "grad_norm": 17.58693504333496, + "learning_rate": 1.1721759827599326e-06, + "loss": 0.0693, + "num_input_tokens_seen": 9331312, + "step": 18960 + }, + { + "epoch": 2.502969512999868, + "grad_norm": 24.822282791137695, + "learning_rate": 1.1717221463502102e-06, + "loss": 0.0561, + "num_input_tokens_seen": 9333872, + "step": 18965 + }, + { + "epoch": 2.5036294047776164, + "grad_norm": 0.05071251094341278, + "learning_rate": 1.1712682734937058e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9336176, + "step": 18970 + }, + { + "epoch": 2.504289296555365, + "grad_norm": 0.4167310297489166, + "learning_rate": 1.1708143642867506e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9338800, + "step": 18975 + }, + { + "epoch": 2.5049491883331134, + "grad_norm": 0.021511459723114967, + "learning_rate": 1.1703604188256833e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9341232, + "step": 18980 + }, + { + "epoch": 2.5056090801108617, + "grad_norm": 24.896465301513672, + "learning_rate": 1.169906437206851e-06, + "loss": 0.0045, + "num_input_tokens_seen": 9343664, + "step": 18985 + }, + { + "epoch": 2.5062689718886104, + "grad_norm": 0.0010555103654041886, + "learning_rate": 1.1694524195266077e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9346096, + "step": 18990 + }, + { + "epoch": 2.5069288636663587, + "grad_norm": 24.33745765686035, + "learning_rate": 1.1689983658813152e-06, + "loss": 0.1795, + "num_input_tokens_seen": 9348592, + "step": 18995 + }, + { + "epoch": 2.5075887554441074, + "grad_norm": 0.8176755905151367, + "learning_rate": 1.1685442763673436e-06, + "loss": 0.0014, + "num_input_tokens_seen": 9351088, + "step": 19000 + }, + { + "epoch": 2.5082486472218557, + "grad_norm": 0.24280667304992676, + "learning_rate": 1.16809015108107e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9353648, + "step": 19005 + }, + { + "epoch": 2.508908538999604, + "grad_norm": 0.06780319660902023, + "learning_rate": 1.1676359901188785e-06, + "loss": 0.0019, + "num_input_tokens_seen": 9356208, + "step": 19010 + }, + { + "epoch": 2.5095684307773523, + "grad_norm": 0.2860470116138458, + "learning_rate": 1.1671817935771623e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9358320, + "step": 19015 + }, + { + "epoch": 2.510228322555101, + "grad_norm": 0.018246835097670555, + "learning_rate": 1.166727561552321e-06, + "loss": 0.046, + "num_input_tokens_seen": 9360688, + "step": 19020 + }, + { + "epoch": 2.5108882143328493, + "grad_norm": 0.017531786113977432, + "learning_rate": 1.1662732941407625e-06, + "loss": 0.0188, + "num_input_tokens_seen": 9363248, + "step": 19025 + }, + { + "epoch": 2.511548106110598, + "grad_norm": 0.004202735144644976, + "learning_rate": 1.165818991438901e-06, + "loss": 0.0165, + "num_input_tokens_seen": 9365872, + "step": 19030 + }, + { + "epoch": 2.5122079978883463, + "grad_norm": 0.927355170249939, + "learning_rate": 1.1653646535431593e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9368368, + "step": 19035 + }, + { + "epoch": 2.5128678896660945, + "grad_norm": 0.005749446805566549, + "learning_rate": 1.1649102805499676e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9371056, + "step": 19040 + }, + { + "epoch": 2.5135277814438433, + "grad_norm": 0.00678770337253809, + "learning_rate": 1.1644558725557627e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9373680, + "step": 19045 + }, + { + "epoch": 2.5141876732215915, + "grad_norm": 0.010611538775265217, + "learning_rate": 1.16400142965699e-06, + "loss": 0.0, + "num_input_tokens_seen": 9375920, + "step": 19050 + }, + { + "epoch": 2.5148475649993403, + "grad_norm": 16.00409507751465, + "learning_rate": 1.1635469519501015e-06, + "loss": 0.1645, + "num_input_tokens_seen": 9378608, + "step": 19055 + }, + { + "epoch": 2.5155074567770885, + "grad_norm": 0.06570476293563843, + "learning_rate": 1.1630924395315565e-06, + "loss": 0.0565, + "num_input_tokens_seen": 9380784, + "step": 19060 + }, + { + "epoch": 2.516167348554837, + "grad_norm": 13.64858627319336, + "learning_rate": 1.1626378924978223e-06, + "loss": 0.0412, + "num_input_tokens_seen": 9383216, + "step": 19065 + }, + { + "epoch": 2.5168272403325855, + "grad_norm": 0.13996559381484985, + "learning_rate": 1.1621833109453734e-06, + "loss": 0.0311, + "num_input_tokens_seen": 9385712, + "step": 19070 + }, + { + "epoch": 2.517487132110334, + "grad_norm": 0.0008270741673186421, + "learning_rate": 1.161728694970691e-06, + "loss": 0.0527, + "num_input_tokens_seen": 9387888, + "step": 19075 + }, + { + "epoch": 2.5181470238880825, + "grad_norm": 0.006990394555032253, + "learning_rate": 1.1612740446702645e-06, + "loss": 0.0464, + "num_input_tokens_seen": 9390192, + "step": 19080 + }, + { + "epoch": 2.518806915665831, + "grad_norm": 0.022595075890421867, + "learning_rate": 1.1608193601405894e-06, + "loss": 0.0352, + "num_input_tokens_seen": 9392816, + "step": 19085 + }, + { + "epoch": 2.519466807443579, + "grad_norm": 0.01655641384422779, + "learning_rate": 1.1603646414781701e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9395184, + "step": 19090 + }, + { + "epoch": 2.520126699221328, + "grad_norm": 0.007066233549267054, + "learning_rate": 1.1599098887795164e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9397232, + "step": 19095 + }, + { + "epoch": 2.520786590999076, + "grad_norm": 0.280401349067688, + "learning_rate": 1.1594551021411473e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9399664, + "step": 19100 + }, + { + "epoch": 2.521446482776825, + "grad_norm": 0.020977843552827835, + "learning_rate": 1.1590002816595874e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9401968, + "step": 19105 + }, + { + "epoch": 2.522106374554573, + "grad_norm": 0.0056993900798261166, + "learning_rate": 1.158545427431369e-06, + "loss": 0.1127, + "num_input_tokens_seen": 9404400, + "step": 19110 + }, + { + "epoch": 2.5227662663323214, + "grad_norm": 0.005368967540562153, + "learning_rate": 1.1580905395530317e-06, + "loss": 0.073, + "num_input_tokens_seen": 9406832, + "step": 19115 + }, + { + "epoch": 2.52342615811007, + "grad_norm": 0.3067415952682495, + "learning_rate": 1.1576356181211223e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9409264, + "step": 19120 + }, + { + "epoch": 2.5240860498878184, + "grad_norm": 15.489126205444336, + "learning_rate": 1.1571806632321941e-06, + "loss": 0.1069, + "num_input_tokens_seen": 9411824, + "step": 19125 + }, + { + "epoch": 2.524745941665567, + "grad_norm": 0.005901966709643602, + "learning_rate": 1.1567256749828088e-06, + "loss": 0.0614, + "num_input_tokens_seen": 9414320, + "step": 19130 + }, + { + "epoch": 2.5254058334433154, + "grad_norm": 0.425497442483902, + "learning_rate": 1.1562706534695337e-06, + "loss": 0.0487, + "num_input_tokens_seen": 9416688, + "step": 19135 + }, + { + "epoch": 2.5260657252210637, + "grad_norm": 0.0033785065170377493, + "learning_rate": 1.1558155987889437e-06, + "loss": 0.0504, + "num_input_tokens_seen": 9419120, + "step": 19140 + }, + { + "epoch": 2.526725616998812, + "grad_norm": 0.009927736595273018, + "learning_rate": 1.1553605110376216e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9421552, + "step": 19145 + }, + { + "epoch": 2.5273855087765607, + "grad_norm": 0.03838609904050827, + "learning_rate": 1.154905390312156e-06, + "loss": 0.0725, + "num_input_tokens_seen": 9424112, + "step": 19150 + }, + { + "epoch": 2.528045400554309, + "grad_norm": 0.012181775644421577, + "learning_rate": 1.1544502367091428e-06, + "loss": 0.0382, + "num_input_tokens_seen": 9426352, + "step": 19155 + }, + { + "epoch": 2.5287052923320577, + "grad_norm": 0.20829293131828308, + "learning_rate": 1.1539950503251858e-06, + "loss": 0.0905, + "num_input_tokens_seen": 9429040, + "step": 19160 + }, + { + "epoch": 2.529365184109806, + "grad_norm": 0.013603360392153263, + "learning_rate": 1.153539831256894e-06, + "loss": 0.0015, + "num_input_tokens_seen": 9431280, + "step": 19165 + }, + { + "epoch": 2.530025075887554, + "grad_norm": 0.010468337684869766, + "learning_rate": 1.1530845796008853e-06, + "loss": 0.023, + "num_input_tokens_seen": 9433648, + "step": 19170 + }, + { + "epoch": 2.530684967665303, + "grad_norm": 0.08018022030591965, + "learning_rate": 1.1526292954537827e-06, + "loss": 0.0277, + "num_input_tokens_seen": 9436400, + "step": 19175 + }, + { + "epoch": 2.531344859443051, + "grad_norm": 0.005076752509921789, + "learning_rate": 1.1521739789122179e-06, + "loss": 0.0384, + "num_input_tokens_seen": 9438896, + "step": 19180 + }, + { + "epoch": 2.5320047512208, + "grad_norm": 0.0038340799510478973, + "learning_rate": 1.1517186300728276e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9441136, + "step": 19185 + }, + { + "epoch": 2.532664642998548, + "grad_norm": 0.018121426925063133, + "learning_rate": 1.151263249032257e-06, + "loss": 0.0023, + "num_input_tokens_seen": 9443376, + "step": 19190 + }, + { + "epoch": 2.5333245347762965, + "grad_norm": 40.5118522644043, + "learning_rate": 1.150807835887157e-06, + "loss": 0.049, + "num_input_tokens_seen": 9445808, + "step": 19195 + }, + { + "epoch": 2.533984426554045, + "grad_norm": 8.46541976928711, + "learning_rate": 1.1503523907341858e-06, + "loss": 0.1468, + "num_input_tokens_seen": 9447984, + "step": 19200 + }, + { + "epoch": 2.5346443183317935, + "grad_norm": 1.6953380107879639, + "learning_rate": 1.1498969136700087e-06, + "loss": 0.0019, + "num_input_tokens_seen": 9450352, + "step": 19205 + }, + { + "epoch": 2.535304210109542, + "grad_norm": 1.2033361196517944, + "learning_rate": 1.1494414047912967e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9452976, + "step": 19210 + }, + { + "epoch": 2.5359641018872905, + "grad_norm": 0.04662247374653816, + "learning_rate": 1.1489858641947292e-06, + "loss": 0.0011, + "num_input_tokens_seen": 9455536, + "step": 19215 + }, + { + "epoch": 2.5366239936650388, + "grad_norm": 0.015560769475996494, + "learning_rate": 1.1485302919769906e-06, + "loss": 0.0336, + "num_input_tokens_seen": 9458224, + "step": 19220 + }, + { + "epoch": 2.5372838854427875, + "grad_norm": 0.017624717205762863, + "learning_rate": 1.1480746882347733e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9460592, + "step": 19225 + }, + { + "epoch": 2.5379437772205358, + "grad_norm": 0.14374710619449615, + "learning_rate": 1.1476190530647754e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9463152, + "step": 19230 + }, + { + "epoch": 2.5386036689982845, + "grad_norm": 20.909866333007812, + "learning_rate": 1.1471633865637027e-06, + "loss": 0.077, + "num_input_tokens_seen": 9465712, + "step": 19235 + }, + { + "epoch": 2.5392635607760328, + "grad_norm": 0.1594572812318802, + "learning_rate": 1.146707688828267e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9468400, + "step": 19240 + }, + { + "epoch": 2.539923452553781, + "grad_norm": 0.0020154337398707867, + "learning_rate": 1.1462519599551864e-06, + "loss": 0.0, + "num_input_tokens_seen": 9470704, + "step": 19245 + }, + { + "epoch": 2.5405833443315298, + "grad_norm": 23.83881950378418, + "learning_rate": 1.1457962000411864e-06, + "loss": 0.2087, + "num_input_tokens_seen": 9472944, + "step": 19250 + }, + { + "epoch": 2.541243236109278, + "grad_norm": 0.028763171285390854, + "learning_rate": 1.1453404091829987e-06, + "loss": 0.0457, + "num_input_tokens_seen": 9475440, + "step": 19255 + }, + { + "epoch": 2.5419031278870268, + "grad_norm": 0.03985166549682617, + "learning_rate": 1.1448845874773623e-06, + "loss": 0.0014, + "num_input_tokens_seen": 9477808, + "step": 19260 + }, + { + "epoch": 2.542563019664775, + "grad_norm": 0.01088575180619955, + "learning_rate": 1.1444287350210208e-06, + "loss": 0.0019, + "num_input_tokens_seen": 9480368, + "step": 19265 + }, + { + "epoch": 2.5432229114425233, + "grad_norm": 0.1729097068309784, + "learning_rate": 1.143972851910726e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9482864, + "step": 19270 + }, + { + "epoch": 2.5438828032202716, + "grad_norm": 0.005866058170795441, + "learning_rate": 1.143516938243236e-06, + "loss": 0.0, + "num_input_tokens_seen": 9485488, + "step": 19275 + }, + { + "epoch": 2.5445426949980203, + "grad_norm": 0.011199753731489182, + "learning_rate": 1.1430609941153154e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9487856, + "step": 19280 + }, + { + "epoch": 2.545202586775769, + "grad_norm": 0.004434535745531321, + "learning_rate": 1.1426050196237347e-06, + "loss": 0.0015, + "num_input_tokens_seen": 9489968, + "step": 19285 + }, + { + "epoch": 2.5458624785535173, + "grad_norm": 0.005931300576776266, + "learning_rate": 1.142149014865271e-06, + "loss": 0.0698, + "num_input_tokens_seen": 9492464, + "step": 19290 + }, + { + "epoch": 2.5465223703312656, + "grad_norm": 0.021038591861724854, + "learning_rate": 1.1416929799367086e-06, + "loss": 0.0744, + "num_input_tokens_seen": 9495024, + "step": 19295 + }, + { + "epoch": 2.547182262109014, + "grad_norm": 0.06645817309617996, + "learning_rate": 1.141236914934837e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9497648, + "step": 19300 + }, + { + "epoch": 2.5478421538867626, + "grad_norm": 0.0028256825171411037, + "learning_rate": 1.1407808199564532e-06, + "loss": 0.0009, + "num_input_tokens_seen": 9499824, + "step": 19305 + }, + { + "epoch": 2.548502045664511, + "grad_norm": 0.021098587661981583, + "learning_rate": 1.1403246950983598e-06, + "loss": 0.1192, + "num_input_tokens_seen": 9502064, + "step": 19310 + }, + { + "epoch": 2.5491619374422596, + "grad_norm": 0.01978352852165699, + "learning_rate": 1.1398685404573657e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9504560, + "step": 19315 + }, + { + "epoch": 2.549821829220008, + "grad_norm": 0.009966706857085228, + "learning_rate": 1.139412356130287e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9507120, + "step": 19320 + }, + { + "epoch": 2.550481720997756, + "grad_norm": 0.09230407327413559, + "learning_rate": 1.138956142213945e-06, + "loss": 0.0831, + "num_input_tokens_seen": 9509552, + "step": 19325 + }, + { + "epoch": 2.551141612775505, + "grad_norm": 0.004419033881276846, + "learning_rate": 1.1384998988051684e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9512304, + "step": 19330 + }, + { + "epoch": 2.551801504553253, + "grad_norm": 0.12761980295181274, + "learning_rate": 1.1380436260007914e-06, + "loss": 0.0749, + "num_input_tokens_seen": 9514736, + "step": 19335 + }, + { + "epoch": 2.552461396331002, + "grad_norm": 0.16481366753578186, + "learning_rate": 1.1375873238976542e-06, + "loss": 0.0802, + "num_input_tokens_seen": 9517232, + "step": 19340 + }, + { + "epoch": 2.55312128810875, + "grad_norm": 0.0165677722543478, + "learning_rate": 1.1371309925926034e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9519472, + "step": 19345 + }, + { + "epoch": 2.5537811798864984, + "grad_norm": 0.014621448703110218, + "learning_rate": 1.1366746321824928e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9521776, + "step": 19350 + }, + { + "epoch": 2.554441071664247, + "grad_norm": 1.5919983386993408, + "learning_rate": 1.1362182427641812e-06, + "loss": 0.0014, + "num_input_tokens_seen": 9524208, + "step": 19355 + }, + { + "epoch": 2.5551009634419954, + "grad_norm": 0.025344735011458397, + "learning_rate": 1.135761824434534e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9526768, + "step": 19360 + }, + { + "epoch": 2.555760855219744, + "grad_norm": 0.0029130533803254366, + "learning_rate": 1.135305377290423e-06, + "loss": 0.0013, + "num_input_tokens_seen": 9529200, + "step": 19365 + }, + { + "epoch": 2.5564207469974924, + "grad_norm": 0.011325112544000149, + "learning_rate": 1.1348489014287248e-06, + "loss": 0.1098, + "num_input_tokens_seen": 9531824, + "step": 19370 + }, + { + "epoch": 2.5570806387752407, + "grad_norm": 0.012303993105888367, + "learning_rate": 1.1343923969463243e-06, + "loss": 0.0411, + "num_input_tokens_seen": 9534192, + "step": 19375 + }, + { + "epoch": 2.5577405305529894, + "grad_norm": 0.005361241288483143, + "learning_rate": 1.1339358639401103e-06, + "loss": 0.0, + "num_input_tokens_seen": 9536688, + "step": 19380 + }, + { + "epoch": 2.5584004223307377, + "grad_norm": 25.965045928955078, + "learning_rate": 1.1334793025069794e-06, + "loss": 0.0505, + "num_input_tokens_seen": 9539248, + "step": 19385 + }, + { + "epoch": 2.5590603141084864, + "grad_norm": 0.2334904968738556, + "learning_rate": 1.1330227127438332e-06, + "loss": 0.0818, + "num_input_tokens_seen": 9541936, + "step": 19390 + }, + { + "epoch": 2.5597202058862347, + "grad_norm": 0.0050776004791259766, + "learning_rate": 1.1325660947475792e-06, + "loss": 0.087, + "num_input_tokens_seen": 9544240, + "step": 19395 + }, + { + "epoch": 2.560380097663983, + "grad_norm": 0.06891787052154541, + "learning_rate": 1.1321094486151317e-06, + "loss": 0.0565, + "num_input_tokens_seen": 9546608, + "step": 19400 + }, + { + "epoch": 2.5610399894417313, + "grad_norm": 0.19221165776252747, + "learning_rate": 1.1316527744434104e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9549040, + "step": 19405 + }, + { + "epoch": 2.56169988121948, + "grad_norm": 0.02088336832821369, + "learning_rate": 1.131196072329341e-06, + "loss": 0.0177, + "num_input_tokens_seen": 9551792, + "step": 19410 + }, + { + "epoch": 2.5623597729972287, + "grad_norm": 0.005114255007356405, + "learning_rate": 1.1307393423698555e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9554480, + "step": 19415 + }, + { + "epoch": 2.563019664774977, + "grad_norm": 0.0021275868639349937, + "learning_rate": 1.1302825846618912e-06, + "loss": 0.0, + "num_input_tokens_seen": 9557040, + "step": 19420 + }, + { + "epoch": 2.5636795565527253, + "grad_norm": 0.003015077905729413, + "learning_rate": 1.1298257993023917e-06, + "loss": 0.0352, + "num_input_tokens_seen": 9559600, + "step": 19425 + }, + { + "epoch": 2.5643394483304736, + "grad_norm": 0.00647539459168911, + "learning_rate": 1.1293689863883062e-06, + "loss": 0.0736, + "num_input_tokens_seen": 9562096, + "step": 19430 + }, + { + "epoch": 2.5649993401082223, + "grad_norm": 0.004464747849851847, + "learning_rate": 1.1289121460165907e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9564400, + "step": 19435 + }, + { + "epoch": 2.5656592318859706, + "grad_norm": 0.04154035821557045, + "learning_rate": 1.1284552782842054e-06, + "loss": 0.0911, + "num_input_tokens_seen": 9566768, + "step": 19440 + }, + { + "epoch": 2.5663191236637193, + "grad_norm": 0.014330783858895302, + "learning_rate": 1.1279983832881174e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9569648, + "step": 19445 + }, + { + "epoch": 2.5669790154414676, + "grad_norm": 194.3843231201172, + "learning_rate": 1.1275414611252996e-06, + "loss": 0.0241, + "num_input_tokens_seen": 9572528, + "step": 19450 + }, + { + "epoch": 2.567638907219216, + "grad_norm": 0.028719462454319, + "learning_rate": 1.1270845118927304e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9575152, + "step": 19455 + }, + { + "epoch": 2.5682987989969646, + "grad_norm": 0.027058910578489304, + "learning_rate": 1.1266275356873933e-06, + "loss": 0.0428, + "num_input_tokens_seen": 9577712, + "step": 19460 + }, + { + "epoch": 2.568958690774713, + "grad_norm": 0.001484107575379312, + "learning_rate": 1.1261705326062792e-06, + "loss": 0.001, + "num_input_tokens_seen": 9579952, + "step": 19465 + }, + { + "epoch": 2.5696185825524616, + "grad_norm": 0.010305678471922874, + "learning_rate": 1.1257135027463831e-06, + "loss": 0.1439, + "num_input_tokens_seen": 9582512, + "step": 19470 + }, + { + "epoch": 2.57027847433021, + "grad_norm": 0.09567421674728394, + "learning_rate": 1.1252564462047063e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9584624, + "step": 19475 + }, + { + "epoch": 2.570938366107958, + "grad_norm": 0.04122396185994148, + "learning_rate": 1.124799363078256e-06, + "loss": 0.0707, + "num_input_tokens_seen": 9587056, + "step": 19480 + }, + { + "epoch": 2.571598257885707, + "grad_norm": 0.05378778278827667, + "learning_rate": 1.1243422534640443e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9589232, + "step": 19485 + }, + { + "epoch": 2.572258149663455, + "grad_norm": 0.04564797505736351, + "learning_rate": 1.12388511745909e-06, + "loss": 0.0706, + "num_input_tokens_seen": 9591792, + "step": 19490 + }, + { + "epoch": 2.572918041441204, + "grad_norm": 0.018719684332609177, + "learning_rate": 1.1234279551604164e-06, + "loss": 0.0012, + "num_input_tokens_seen": 9594352, + "step": 19495 + }, + { + "epoch": 2.573577933218952, + "grad_norm": 0.017055541276931763, + "learning_rate": 1.1229707666650531e-06, + "loss": 0.0584, + "num_input_tokens_seen": 9597168, + "step": 19500 + }, + { + "epoch": 2.5742378249967004, + "grad_norm": 0.024933185428380966, + "learning_rate": 1.1225135520700355e-06, + "loss": 0.0311, + "num_input_tokens_seen": 9599728, + "step": 19505 + }, + { + "epoch": 2.574897716774449, + "grad_norm": 0.09410513937473297, + "learning_rate": 1.122056311472403e-06, + "loss": 0.0561, + "num_input_tokens_seen": 9602096, + "step": 19510 + }, + { + "epoch": 2.5755576085521974, + "grad_norm": 0.4654831886291504, + "learning_rate": 1.121599044969203e-06, + "loss": 0.0667, + "num_input_tokens_seen": 9604464, + "step": 19515 + }, + { + "epoch": 2.576217500329946, + "grad_norm": 0.1882486641407013, + "learning_rate": 1.1211417526574858e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9606896, + "step": 19520 + }, + { + "epoch": 2.5768773921076944, + "grad_norm": 3.377927303314209, + "learning_rate": 1.1206844346343089e-06, + "loss": 0.0013, + "num_input_tokens_seen": 9609520, + "step": 19525 + }, + { + "epoch": 2.5775372838854427, + "grad_norm": 0.19885686039924622, + "learning_rate": 1.1202270909967347e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9612336, + "step": 19530 + }, + { + "epoch": 2.5781971756631914, + "grad_norm": 0.0585622675716877, + "learning_rate": 1.119769721841831e-06, + "loss": 0.0861, + "num_input_tokens_seen": 9614320, + "step": 19535 + }, + { + "epoch": 2.5788570674409397, + "grad_norm": 0.6341502666473389, + "learning_rate": 1.119312327266671e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9617136, + "step": 19540 + }, + { + "epoch": 2.5795169592186884, + "grad_norm": 0.005355259403586388, + "learning_rate": 1.1188549073683338e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9619760, + "step": 19545 + }, + { + "epoch": 2.5801768509964367, + "grad_norm": 0.014456301927566528, + "learning_rate": 1.1183974622439032e-06, + "loss": 0.0, + "num_input_tokens_seen": 9622320, + "step": 19550 + }, + { + "epoch": 2.580836742774185, + "grad_norm": 0.03556323051452637, + "learning_rate": 1.1179399919904683e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9624880, + "step": 19555 + }, + { + "epoch": 2.5814966345519332, + "grad_norm": 19.87205696105957, + "learning_rate": 1.1174824967051244e-06, + "loss": 0.0987, + "num_input_tokens_seen": 9627312, + "step": 19560 + }, + { + "epoch": 2.582156526329682, + "grad_norm": 0.03862188756465912, + "learning_rate": 1.117024976484971e-06, + "loss": 0.0, + "num_input_tokens_seen": 9630000, + "step": 19565 + }, + { + "epoch": 2.5828164181074302, + "grad_norm": 14.911026954650879, + "learning_rate": 1.1165674314271142e-06, + "loss": 0.1392, + "num_input_tokens_seen": 9632432, + "step": 19570 + }, + { + "epoch": 2.583476309885179, + "grad_norm": 0.04838300123810768, + "learning_rate": 1.1161098616286641e-06, + "loss": 0.1115, + "num_input_tokens_seen": 9634992, + "step": 19575 + }, + { + "epoch": 2.5841362016629272, + "grad_norm": 0.04838849604129791, + "learning_rate": 1.1156522671867366e-06, + "loss": 0.0023, + "num_input_tokens_seen": 9637296, + "step": 19580 + }, + { + "epoch": 2.5847960934406755, + "grad_norm": 0.026173245161771774, + "learning_rate": 1.1151946481984528e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9639664, + "step": 19585 + }, + { + "epoch": 2.5854559852184242, + "grad_norm": 0.006514672189950943, + "learning_rate": 1.1147370047609391e-06, + "loss": 0.0971, + "num_input_tokens_seen": 9642224, + "step": 19590 + }, + { + "epoch": 2.5861158769961725, + "grad_norm": 121.60194396972656, + "learning_rate": 1.1142793369713273e-06, + "loss": 0.1041, + "num_input_tokens_seen": 9644592, + "step": 19595 + }, + { + "epoch": 2.5867757687739212, + "grad_norm": 0.08616314083337784, + "learning_rate": 1.1138216449267536e-06, + "loss": 0.0555, + "num_input_tokens_seen": 9647152, + "step": 19600 + }, + { + "epoch": 2.5874356605516695, + "grad_norm": 0.2943209707736969, + "learning_rate": 1.11336392872436e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9649584, + "step": 19605 + }, + { + "epoch": 2.588095552329418, + "grad_norm": 0.03955228999257088, + "learning_rate": 1.112906188461293e-06, + "loss": 0.0612, + "num_input_tokens_seen": 9652080, + "step": 19610 + }, + { + "epoch": 2.5887554441071665, + "grad_norm": 0.028087584301829338, + "learning_rate": 1.1124484242347055e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9654448, + "step": 19615 + }, + { + "epoch": 2.589415335884915, + "grad_norm": 0.26087915897369385, + "learning_rate": 1.1119906361417544e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9657008, + "step": 19620 + }, + { + "epoch": 2.5900752276626635, + "grad_norm": 4.318361282348633, + "learning_rate": 1.1115328242796017e-06, + "loss": 0.0618, + "num_input_tokens_seen": 9659376, + "step": 19625 + }, + { + "epoch": 2.590735119440412, + "grad_norm": 0.008139794692397118, + "learning_rate": 1.1110749887454146e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9662064, + "step": 19630 + }, + { + "epoch": 2.59139501121816, + "grad_norm": 0.005774452816694975, + "learning_rate": 1.110617129636365e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9664112, + "step": 19635 + }, + { + "epoch": 2.592054902995909, + "grad_norm": 0.0302497036755085, + "learning_rate": 1.1101592470496315e-06, + "loss": 0.0611, + "num_input_tokens_seen": 9666288, + "step": 19640 + }, + { + "epoch": 2.592714794773657, + "grad_norm": 0.07261912524700165, + "learning_rate": 1.1097013410823952e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9668528, + "step": 19645 + }, + { + "epoch": 2.593374686551406, + "grad_norm": 0.1376306116580963, + "learning_rate": 1.1092434118318435e-06, + "loss": 0.0945, + "num_input_tokens_seen": 9670832, + "step": 19650 + }, + { + "epoch": 2.594034578329154, + "grad_norm": 0.09144063293933868, + "learning_rate": 1.1087854593951688e-06, + "loss": 0.0826, + "num_input_tokens_seen": 9673008, + "step": 19655 + }, + { + "epoch": 2.5946944701069024, + "grad_norm": 14.743500709533691, + "learning_rate": 1.108327483869568e-06, + "loss": 0.2127, + "num_input_tokens_seen": 9675568, + "step": 19660 + }, + { + "epoch": 2.595354361884651, + "grad_norm": 0.10582589358091354, + "learning_rate": 1.1078694853522435e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9678192, + "step": 19665 + }, + { + "epoch": 2.5960142536623994, + "grad_norm": 0.0031384157482534647, + "learning_rate": 1.1074114639404015e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9680624, + "step": 19670 + }, + { + "epoch": 2.596674145440148, + "grad_norm": 0.17820972204208374, + "learning_rate": 1.1069534197312544e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9683056, + "step": 19675 + }, + { + "epoch": 2.5973340372178964, + "grad_norm": 12.603302001953125, + "learning_rate": 1.1064953528220181e-06, + "loss": 0.091, + "num_input_tokens_seen": 9685616, + "step": 19680 + }, + { + "epoch": 2.5979939289956446, + "grad_norm": 0.018135545775294304, + "learning_rate": 1.1060372633099146e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9688368, + "step": 19685 + }, + { + "epoch": 2.598653820773393, + "grad_norm": 0.07453392446041107, + "learning_rate": 1.10557915129217e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9690672, + "step": 19690 + }, + { + "epoch": 2.5993137125511416, + "grad_norm": 0.027635499835014343, + "learning_rate": 1.1051210168660146e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9692784, + "step": 19695 + }, + { + "epoch": 2.59997360432889, + "grad_norm": 12.794387817382812, + "learning_rate": 1.1046628601286852e-06, + "loss": 0.0424, + "num_input_tokens_seen": 9695216, + "step": 19700 + }, + { + "epoch": 2.6006334961066386, + "grad_norm": 14.300383567810059, + "learning_rate": 1.1042046811774213e-06, + "loss": 0.0569, + "num_input_tokens_seen": 9697456, + "step": 19705 + }, + { + "epoch": 2.601293387884387, + "grad_norm": 0.02011864259839058, + "learning_rate": 1.1037464801094684e-06, + "loss": 0.0568, + "num_input_tokens_seen": 9699760, + "step": 19710 + }, + { + "epoch": 2.601953279662135, + "grad_norm": 0.663529634475708, + "learning_rate": 1.1032882570220764e-06, + "loss": 0.0536, + "num_input_tokens_seen": 9702128, + "step": 19715 + }, + { + "epoch": 2.602613171439884, + "grad_norm": 0.014893081970512867, + "learning_rate": 1.1028300120124997e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9705008, + "step": 19720 + }, + { + "epoch": 2.603273063217632, + "grad_norm": 0.02787351794540882, + "learning_rate": 1.1023717451779977e-06, + "loss": 0.0, + "num_input_tokens_seen": 9707312, + "step": 19725 + }, + { + "epoch": 2.603932954995381, + "grad_norm": 0.015048121102154255, + "learning_rate": 1.1019134566158341e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9709936, + "step": 19730 + }, + { + "epoch": 2.604592846773129, + "grad_norm": 0.007719477638602257, + "learning_rate": 1.1014551464232773e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9712304, + "step": 19735 + }, + { + "epoch": 2.6052527385508775, + "grad_norm": 21.70554542541504, + "learning_rate": 1.1009968146976003e-06, + "loss": 0.0626, + "num_input_tokens_seen": 9714736, + "step": 19740 + }, + { + "epoch": 2.605912630328626, + "grad_norm": 0.0036755255423486233, + "learning_rate": 1.100538461536081e-06, + "loss": 0.1001, + "num_input_tokens_seen": 9717360, + "step": 19745 + }, + { + "epoch": 2.6065725221063745, + "grad_norm": 81.93183898925781, + "learning_rate": 1.1000800870360012e-06, + "loss": 0.0181, + "num_input_tokens_seen": 9719984, + "step": 19750 + }, + { + "epoch": 2.607232413884123, + "grad_norm": 25.536109924316406, + "learning_rate": 1.0996216912946472e-06, + "loss": 0.0023, + "num_input_tokens_seen": 9722352, + "step": 19755 + }, + { + "epoch": 2.6078923056618715, + "grad_norm": 0.0018935500411316752, + "learning_rate": 1.099163274409311e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9724848, + "step": 19760 + }, + { + "epoch": 2.6085521974396197, + "grad_norm": 42.559783935546875, + "learning_rate": 1.098704836477288e-06, + "loss": 0.1899, + "num_input_tokens_seen": 9727472, + "step": 19765 + }, + { + "epoch": 2.6092120892173685, + "grad_norm": 0.014334982261061668, + "learning_rate": 1.098246377595878e-06, + "loss": 0.0, + "num_input_tokens_seen": 9729840, + "step": 19770 + }, + { + "epoch": 2.6098719809951167, + "grad_norm": 0.6818047761917114, + "learning_rate": 1.097787897862386e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9732592, + "step": 19775 + }, + { + "epoch": 2.6105318727728655, + "grad_norm": 0.013156001456081867, + "learning_rate": 1.097329397374121e-06, + "loss": 0.1143, + "num_input_tokens_seen": 9734960, + "step": 19780 + }, + { + "epoch": 2.6111917645506137, + "grad_norm": 0.11573667824268341, + "learning_rate": 1.0968708762283955e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9737328, + "step": 19785 + }, + { + "epoch": 2.611851656328362, + "grad_norm": 30.16448211669922, + "learning_rate": 1.0964123345225285e-06, + "loss": 0.1288, + "num_input_tokens_seen": 9739888, + "step": 19790 + }, + { + "epoch": 2.6125115481061107, + "grad_norm": 0.019509224221110344, + "learning_rate": 1.0959537723538414e-06, + "loss": 0.1315, + "num_input_tokens_seen": 9742576, + "step": 19795 + }, + { + "epoch": 2.613171439883859, + "grad_norm": 0.02313673309981823, + "learning_rate": 1.0954951898196614e-06, + "loss": 0.014, + "num_input_tokens_seen": 9745264, + "step": 19800 + }, + { + "epoch": 2.6138313316616077, + "grad_norm": 20.099658966064453, + "learning_rate": 1.0950365870173186e-06, + "loss": 0.1164, + "num_input_tokens_seen": 9747568, + "step": 19805 + }, + { + "epoch": 2.614491223439356, + "grad_norm": 0.01467086561024189, + "learning_rate": 1.0945779640441484e-06, + "loss": 0.0801, + "num_input_tokens_seen": 9750192, + "step": 19810 + }, + { + "epoch": 2.6151511152171043, + "grad_norm": 0.027691079303622246, + "learning_rate": 1.0941193209974902e-06, + "loss": 0.0024, + "num_input_tokens_seen": 9752752, + "step": 19815 + }, + { + "epoch": 2.6158110069948526, + "grad_norm": 0.1043350100517273, + "learning_rate": 1.0936606579746877e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9755504, + "step": 19820 + }, + { + "epoch": 2.6164708987726013, + "grad_norm": 39.8645133972168, + "learning_rate": 1.0932019750730888e-06, + "loss": 0.0253, + "num_input_tokens_seen": 9757936, + "step": 19825 + }, + { + "epoch": 2.6171307905503496, + "grad_norm": 0.016320737078785896, + "learning_rate": 1.0927432723900455e-06, + "loss": 0.0, + "num_input_tokens_seen": 9760368, + "step": 19830 + }, + { + "epoch": 2.6177906823280983, + "grad_norm": 0.1097755879163742, + "learning_rate": 1.0922845500229143e-06, + "loss": 0.0281, + "num_input_tokens_seen": 9762672, + "step": 19835 + }, + { + "epoch": 2.6184505741058466, + "grad_norm": 0.01819690689444542, + "learning_rate": 1.0918258080690557e-06, + "loss": 0.0326, + "num_input_tokens_seen": 9765040, + "step": 19840 + }, + { + "epoch": 2.619110465883595, + "grad_norm": 0.027937527745962143, + "learning_rate": 1.0913670466258343e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9767536, + "step": 19845 + }, + { + "epoch": 2.6197703576613436, + "grad_norm": 0.02201470360159874, + "learning_rate": 1.090908265790619e-06, + "loss": 0.0271, + "num_input_tokens_seen": 9770032, + "step": 19850 + }, + { + "epoch": 2.620430249439092, + "grad_norm": 34.935428619384766, + "learning_rate": 1.0904494656607824e-06, + "loss": 0.0798, + "num_input_tokens_seen": 9772656, + "step": 19855 + }, + { + "epoch": 2.6210901412168406, + "grad_norm": 18.851734161376953, + "learning_rate": 1.0899906463337016e-06, + "loss": 0.0459, + "num_input_tokens_seen": 9775408, + "step": 19860 + }, + { + "epoch": 2.621750032994589, + "grad_norm": 0.2432224303483963, + "learning_rate": 1.0895318079067576e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9777712, + "step": 19865 + }, + { + "epoch": 2.622409924772337, + "grad_norm": 0.2236015647649765, + "learning_rate": 1.0890729504773359e-06, + "loss": 0.1895, + "num_input_tokens_seen": 9780144, + "step": 19870 + }, + { + "epoch": 2.623069816550086, + "grad_norm": 0.05861254781484604, + "learning_rate": 1.0886140741428257e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9782960, + "step": 19875 + }, + { + "epoch": 2.623729708327834, + "grad_norm": 0.005046785343438387, + "learning_rate": 1.08815517900062e-06, + "loss": 0.0887, + "num_input_tokens_seen": 9785136, + "step": 19880 + }, + { + "epoch": 2.624389600105583, + "grad_norm": 0.01770644262433052, + "learning_rate": 1.0876962651481159e-06, + "loss": 0.0035, + "num_input_tokens_seen": 9787696, + "step": 19885 + }, + { + "epoch": 2.625049491883331, + "grad_norm": 0.008962863124907017, + "learning_rate": 1.0872373326827143e-06, + "loss": 0.0009, + "num_input_tokens_seen": 9790192, + "step": 19890 + }, + { + "epoch": 2.6257093836610794, + "grad_norm": 0.0033157370053231716, + "learning_rate": 1.0867783817018207e-06, + "loss": 0.0338, + "num_input_tokens_seen": 9792752, + "step": 19895 + }, + { + "epoch": 2.626369275438828, + "grad_norm": 28.182462692260742, + "learning_rate": 1.086319412302844e-06, + "loss": 0.1192, + "num_input_tokens_seen": 9795376, + "step": 19900 + }, + { + "epoch": 2.6270291672165764, + "grad_norm": 0.005671774502843618, + "learning_rate": 1.085860424583197e-06, + "loss": 0.0018, + "num_input_tokens_seen": 9797872, + "step": 19905 + }, + { + "epoch": 2.627689058994325, + "grad_norm": 0.11161317676305771, + "learning_rate": 1.0854014186402968e-06, + "loss": 0.0488, + "num_input_tokens_seen": 9800432, + "step": 19910 + }, + { + "epoch": 2.6283489507720734, + "grad_norm": 0.004430611617863178, + "learning_rate": 1.0849423945715637e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9802992, + "step": 19915 + }, + { + "epoch": 2.6290088425498217, + "grad_norm": 0.12796758115291595, + "learning_rate": 1.0844833524744226e-06, + "loss": 0.0881, + "num_input_tokens_seen": 9805424, + "step": 19920 + }, + { + "epoch": 2.6296687343275704, + "grad_norm": 0.013856647536158562, + "learning_rate": 1.0840242924463016e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9808048, + "step": 19925 + }, + { + "epoch": 2.6303286261053187, + "grad_norm": 0.07488207519054413, + "learning_rate": 1.0835652145846335e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9810608, + "step": 19930 + }, + { + "epoch": 2.6309885178830674, + "grad_norm": 24.79937744140625, + "learning_rate": 1.0831061189868531e-06, + "loss": 0.1537, + "num_input_tokens_seen": 9812848, + "step": 19935 + }, + { + "epoch": 2.6316484096608157, + "grad_norm": 20.282337188720703, + "learning_rate": 1.0826470057504008e-06, + "loss": 0.1011, + "num_input_tokens_seen": 9815088, + "step": 19940 + }, + { + "epoch": 2.632308301438564, + "grad_norm": 0.022686759009957314, + "learning_rate": 1.0821878749727204e-06, + "loss": 0.0767, + "num_input_tokens_seen": 9817520, + "step": 19945 + }, + { + "epoch": 2.6329681932163123, + "grad_norm": 0.07471811026334763, + "learning_rate": 1.0817287267512583e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9820080, + "step": 19950 + }, + { + "epoch": 2.633628084994061, + "grad_norm": 0.13609452545642853, + "learning_rate": 1.0812695611834664e-06, + "loss": 0.1129, + "num_input_tokens_seen": 9822320, + "step": 19955 + }, + { + "epoch": 2.6342879767718093, + "grad_norm": 0.20238642394542694, + "learning_rate": 1.0808103783667981e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9824432, + "step": 19960 + }, + { + "epoch": 2.634947868549558, + "grad_norm": 0.023685157299041748, + "learning_rate": 1.0803511783987122e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9826992, + "step": 19965 + }, + { + "epoch": 2.6356077603273063, + "grad_norm": 0.08244713395833969, + "learning_rate": 1.0798919613766707e-06, + "loss": 0.0016, + "num_input_tokens_seen": 9829424, + "step": 19970 + }, + { + "epoch": 2.6362676521050545, + "grad_norm": 24.1247501373291, + "learning_rate": 1.079432727398139e-06, + "loss": 0.1057, + "num_input_tokens_seen": 9832048, + "step": 19975 + }, + { + "epoch": 2.6369275438828033, + "grad_norm": 0.03990231081843376, + "learning_rate": 1.078973476560586e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9834352, + "step": 19980 + }, + { + "epoch": 2.6375874356605515, + "grad_norm": 0.023310460150241852, + "learning_rate": 1.0785142089614843e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9836784, + "step": 19985 + }, + { + "epoch": 2.6382473274383003, + "grad_norm": 0.03536645323038101, + "learning_rate": 1.0780549246983105e-06, + "loss": 0.0017, + "num_input_tokens_seen": 9839152, + "step": 19990 + }, + { + "epoch": 2.6389072192160485, + "grad_norm": 0.6347410082817078, + "learning_rate": 1.077595623868544e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9841712, + "step": 19995 + }, + { + "epoch": 2.639567110993797, + "grad_norm": 0.0031490796245634556, + "learning_rate": 1.0771363065696684e-06, + "loss": 0.1022, + "num_input_tokens_seen": 9844144, + "step": 20000 + }, + { + "epoch": 2.6402270027715455, + "grad_norm": 0.06542643159627914, + "learning_rate": 1.0766769728991705e-06, + "loss": 0.0979, + "num_input_tokens_seen": 9846512, + "step": 20005 + }, + { + "epoch": 2.640886894549294, + "grad_norm": 0.1036391630768776, + "learning_rate": 1.0762176229545398e-06, + "loss": 0.0752, + "num_input_tokens_seen": 9849136, + "step": 20010 + }, + { + "epoch": 2.6415467863270425, + "grad_norm": 0.3355109691619873, + "learning_rate": 1.0757582568332711e-06, + "loss": 0.0312, + "num_input_tokens_seen": 9851504, + "step": 20015 + }, + { + "epoch": 2.642206678104791, + "grad_norm": 0.05988360196352005, + "learning_rate": 1.0752988746328607e-06, + "loss": 0.0831, + "num_input_tokens_seen": 9853872, + "step": 20020 + }, + { + "epoch": 2.642866569882539, + "grad_norm": 0.08277001231908798, + "learning_rate": 1.0748394764508095e-06, + "loss": 0.0416, + "num_input_tokens_seen": 9856240, + "step": 20025 + }, + { + "epoch": 2.643526461660288, + "grad_norm": 0.2739223837852478, + "learning_rate": 1.0743800623846213e-06, + "loss": 0.0012, + "num_input_tokens_seen": 9858480, + "step": 20030 + }, + { + "epoch": 2.644186353438036, + "grad_norm": 0.025710877031087875, + "learning_rate": 1.0739206325318038e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9860784, + "step": 20035 + }, + { + "epoch": 2.644846245215785, + "grad_norm": 0.0025923114735633135, + "learning_rate": 1.0734611869898668e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9863408, + "step": 20040 + }, + { + "epoch": 2.645506136993533, + "grad_norm": 0.05023697018623352, + "learning_rate": 1.0730017258563253e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9865840, + "step": 20045 + }, + { + "epoch": 2.6461660287712814, + "grad_norm": 0.00368337519466877, + "learning_rate": 1.0725422492286957e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9868208, + "step": 20050 + }, + { + "epoch": 2.64682592054903, + "grad_norm": 0.0010652344208210707, + "learning_rate": 1.0720827572044995e-06, + "loss": 0.0818, + "num_input_tokens_seen": 9870320, + "step": 20055 + }, + { + "epoch": 2.6474858123267784, + "grad_norm": 0.0436105877161026, + "learning_rate": 1.0716232498812598e-06, + "loss": 0.0457, + "num_input_tokens_seen": 9872752, + "step": 20060 + }, + { + "epoch": 2.648145704104527, + "grad_norm": 0.10467518121004105, + "learning_rate": 1.0711637273565037e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9875376, + "step": 20065 + }, + { + "epoch": 2.6488055958822754, + "grad_norm": 0.7983324527740479, + "learning_rate": 1.0707041897277623e-06, + "loss": 0.002, + "num_input_tokens_seen": 9877680, + "step": 20070 + }, + { + "epoch": 2.6494654876600237, + "grad_norm": 0.0028939221519976854, + "learning_rate": 1.0702446370925682e-06, + "loss": 0.1485, + "num_input_tokens_seen": 9880176, + "step": 20075 + }, + { + "epoch": 2.650125379437772, + "grad_norm": 0.017617272213101387, + "learning_rate": 1.069785069548459e-06, + "loss": 0.0, + "num_input_tokens_seen": 9882672, + "step": 20080 + }, + { + "epoch": 2.6507852712155207, + "grad_norm": 0.053677234798669815, + "learning_rate": 1.0693254871929737e-06, + "loss": 0.0884, + "num_input_tokens_seen": 9885168, + "step": 20085 + }, + { + "epoch": 2.6514451629932694, + "grad_norm": 14.307182312011719, + "learning_rate": 1.068865890123656e-06, + "loss": 0.0369, + "num_input_tokens_seen": 9887728, + "step": 20090 + }, + { + "epoch": 2.6521050547710177, + "grad_norm": 146.76573181152344, + "learning_rate": 1.068406278438052e-06, + "loss": 0.0975, + "num_input_tokens_seen": 9889904, + "step": 20095 + }, + { + "epoch": 2.652764946548766, + "grad_norm": 0.7040823101997375, + "learning_rate": 1.0679466522337102e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9892272, + "step": 20100 + }, + { + "epoch": 2.653424838326514, + "grad_norm": 0.13565072417259216, + "learning_rate": 1.0674870116081838e-06, + "loss": 0.0012, + "num_input_tokens_seen": 9894832, + "step": 20105 + }, + { + "epoch": 2.654084730104263, + "grad_norm": 1.549350619316101, + "learning_rate": 1.067027356659028e-06, + "loss": 0.0145, + "num_input_tokens_seen": 9897520, + "step": 20110 + }, + { + "epoch": 2.654744621882011, + "grad_norm": 0.023634739220142365, + "learning_rate": 1.066567687483801e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9899696, + "step": 20115 + }, + { + "epoch": 2.65540451365976, + "grad_norm": 1.0513437986373901, + "learning_rate": 1.0661080041800642e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9902448, + "step": 20120 + }, + { + "epoch": 2.656064405437508, + "grad_norm": 0.01823163591325283, + "learning_rate": 1.0656483068453828e-06, + "loss": 0.0975, + "num_input_tokens_seen": 9904880, + "step": 20125 + }, + { + "epoch": 2.6567242972152565, + "grad_norm": 16.00564956665039, + "learning_rate": 1.065188595577323e-06, + "loss": 0.0077, + "num_input_tokens_seen": 9907376, + "step": 20130 + }, + { + "epoch": 2.657384188993005, + "grad_norm": 0.001400611363351345, + "learning_rate": 1.0647288704734563e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9910000, + "step": 20135 + }, + { + "epoch": 2.6580440807707535, + "grad_norm": 54.8060188293457, + "learning_rate": 1.0642691316313556e-06, + "loss": 0.0893, + "num_input_tokens_seen": 9912368, + "step": 20140 + }, + { + "epoch": 2.658703972548502, + "grad_norm": 38.158226013183594, + "learning_rate": 1.0638093791485964e-06, + "loss": 0.057, + "num_input_tokens_seen": 9914672, + "step": 20145 + }, + { + "epoch": 2.6593638643262505, + "grad_norm": 0.7230840921401978, + "learning_rate": 1.0633496131227593e-06, + "loss": 0.0003, + "num_input_tokens_seen": 9917104, + "step": 20150 + }, + { + "epoch": 2.6600237561039988, + "grad_norm": 0.07111620157957077, + "learning_rate": 1.0628898336514252e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9919536, + "step": 20155 + }, + { + "epoch": 2.6606836478817475, + "grad_norm": 0.0019288670737296343, + "learning_rate": 1.0624300408321795e-06, + "loss": 0.0844, + "num_input_tokens_seen": 9921712, + "step": 20160 + }, + { + "epoch": 2.6613435396594958, + "grad_norm": 0.14772047102451324, + "learning_rate": 1.0619702347626098e-06, + "loss": 0.0596, + "num_input_tokens_seen": 9924144, + "step": 20165 + }, + { + "epoch": 2.6620034314372445, + "grad_norm": 0.6238264441490173, + "learning_rate": 1.0615104155403063e-06, + "loss": 0.0002, + "num_input_tokens_seen": 9926640, + "step": 20170 + }, + { + "epoch": 2.6626633232149928, + "grad_norm": 0.013693057000637054, + "learning_rate": 1.0610505832628626e-06, + "loss": 0.1212, + "num_input_tokens_seen": 9929072, + "step": 20175 + }, + { + "epoch": 2.663323214992741, + "grad_norm": 17.43290138244629, + "learning_rate": 1.0605907380278745e-06, + "loss": 0.0673, + "num_input_tokens_seen": 9931376, + "step": 20180 + }, + { + "epoch": 2.6639831067704898, + "grad_norm": 0.027616208419203758, + "learning_rate": 1.0601308799329413e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9933552, + "step": 20185 + }, + { + "epoch": 2.664642998548238, + "grad_norm": 40.22153854370117, + "learning_rate": 1.0596710090756641e-06, + "loss": 0.1238, + "num_input_tokens_seen": 9935728, + "step": 20190 + }, + { + "epoch": 2.6653028903259868, + "grad_norm": 0.2736246883869171, + "learning_rate": 1.0592111255536478e-06, + "loss": 0.0167, + "num_input_tokens_seen": 9937968, + "step": 20195 + }, + { + "epoch": 2.665962782103735, + "grad_norm": 63.869476318359375, + "learning_rate": 1.0587512294644982e-06, + "loss": 0.1255, + "num_input_tokens_seen": 9940272, + "step": 20200 + }, + { + "epoch": 2.6666226738814833, + "grad_norm": 15.267470359802246, + "learning_rate": 1.0582913209058257e-06, + "loss": 0.0168, + "num_input_tokens_seen": 9942768, + "step": 20205 + }, + { + "epoch": 2.667282565659232, + "grad_norm": 152.2574462890625, + "learning_rate": 1.0578313999752427e-06, + "loss": 0.2539, + "num_input_tokens_seen": 9945456, + "step": 20210 + }, + { + "epoch": 2.6679424574369803, + "grad_norm": 0.024640629068017006, + "learning_rate": 1.0573714667703638e-06, + "loss": 0.001, + "num_input_tokens_seen": 9948144, + "step": 20215 + }, + { + "epoch": 2.668602349214729, + "grad_norm": 0.020223217085003853, + "learning_rate": 1.0569115213888067e-06, + "loss": 0.0813, + "num_input_tokens_seen": 9950832, + "step": 20220 + }, + { + "epoch": 2.6692622409924773, + "grad_norm": 14.030696868896484, + "learning_rate": 1.0564515639281911e-06, + "loss": 0.1883, + "num_input_tokens_seen": 9953392, + "step": 20225 + }, + { + "epoch": 2.6699221327702256, + "grad_norm": 0.02479293756186962, + "learning_rate": 1.0559915944861397e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9956016, + "step": 20230 + }, + { + "epoch": 2.670582024547974, + "grad_norm": 0.21013309061527252, + "learning_rate": 1.0555316131602778e-06, + "loss": 0.0942, + "num_input_tokens_seen": 9958512, + "step": 20235 + }, + { + "epoch": 2.6712419163257226, + "grad_norm": 0.3462769389152527, + "learning_rate": 1.0550716200482335e-06, + "loss": 0.0016, + "num_input_tokens_seen": 9961008, + "step": 20240 + }, + { + "epoch": 2.671901808103471, + "grad_norm": 0.026164565235376358, + "learning_rate": 1.0546116152476366e-06, + "loss": 0.0428, + "num_input_tokens_seen": 9963568, + "step": 20245 + }, + { + "epoch": 2.6725616998812196, + "grad_norm": 0.0984434112906456, + "learning_rate": 1.0541515988561195e-06, + "loss": 0.1266, + "num_input_tokens_seen": 9965808, + "step": 20250 + }, + { + "epoch": 2.673221591658968, + "grad_norm": 0.08318020403385162, + "learning_rate": 1.053691570971318e-06, + "loss": 0.0373, + "num_input_tokens_seen": 9968304, + "step": 20255 + }, + { + "epoch": 2.673881483436716, + "grad_norm": 0.2922511398792267, + "learning_rate": 1.0532315316908691e-06, + "loss": 0.0505, + "num_input_tokens_seen": 9970608, + "step": 20260 + }, + { + "epoch": 2.674541375214465, + "grad_norm": 0.10788524895906448, + "learning_rate": 1.0527714811124132e-06, + "loss": 0.0933, + "num_input_tokens_seen": 9972976, + "step": 20265 + }, + { + "epoch": 2.675201266992213, + "grad_norm": 0.3631938695907593, + "learning_rate": 1.0523114193335926e-06, + "loss": 0.0012, + "num_input_tokens_seen": 9975472, + "step": 20270 + }, + { + "epoch": 2.675861158769962, + "grad_norm": 0.060309797525405884, + "learning_rate": 1.051851346452052e-06, + "loss": 0.1191, + "num_input_tokens_seen": 9977840, + "step": 20275 + }, + { + "epoch": 2.67652105054771, + "grad_norm": 0.09282801300287247, + "learning_rate": 1.0513912625654386e-06, + "loss": 0.0011, + "num_input_tokens_seen": 9980080, + "step": 20280 + }, + { + "epoch": 2.6771809423254584, + "grad_norm": 0.009397115558385849, + "learning_rate": 1.0509311677714016e-06, + "loss": 0.0008, + "num_input_tokens_seen": 9982384, + "step": 20285 + }, + { + "epoch": 2.677840834103207, + "grad_norm": 0.13344921171665192, + "learning_rate": 1.050471062167594e-06, + "loss": 0.0005, + "num_input_tokens_seen": 9985136, + "step": 20290 + }, + { + "epoch": 2.6785007258809554, + "grad_norm": 0.11381205171346664, + "learning_rate": 1.050010945851668e-06, + "loss": 0.1043, + "num_input_tokens_seen": 9987760, + "step": 20295 + }, + { + "epoch": 2.679160617658704, + "grad_norm": 0.0090647516772151, + "learning_rate": 1.049550818921281e-06, + "loss": 0.0001, + "num_input_tokens_seen": 9990320, + "step": 20300 + }, + { + "epoch": 2.6798205094364524, + "grad_norm": 0.016323421150445938, + "learning_rate": 1.0490906814740916e-06, + "loss": 0.0004, + "num_input_tokens_seen": 9992816, + "step": 20305 + }, + { + "epoch": 2.6804804012142007, + "grad_norm": 0.1127217710018158, + "learning_rate": 1.0486305336077609e-06, + "loss": 0.0006, + "num_input_tokens_seen": 9995120, + "step": 20310 + }, + { + "epoch": 2.6811402929919494, + "grad_norm": 25.71272850036621, + "learning_rate": 1.0481703754199513e-06, + "loss": 0.1659, + "num_input_tokens_seen": 9997488, + "step": 20315 + }, + { + "epoch": 2.6818001847696977, + "grad_norm": 0.004593730438500643, + "learning_rate": 1.047710207008328e-06, + "loss": 0.0007, + "num_input_tokens_seen": 9999920, + "step": 20320 + }, + { + "epoch": 2.6824600765474464, + "grad_norm": 43.304931640625, + "learning_rate": 1.0472500284705595e-06, + "loss": 0.1558, + "num_input_tokens_seen": 10002352, + "step": 20325 + }, + { + "epoch": 2.6831199683251947, + "grad_norm": 0.028853746131062508, + "learning_rate": 1.046789839904314e-06, + "loss": 0.0008, + "num_input_tokens_seen": 10004592, + "step": 20330 + }, + { + "epoch": 2.683779860102943, + "grad_norm": 0.006348696071654558, + "learning_rate": 1.0463296414072641e-06, + "loss": 0.038, + "num_input_tokens_seen": 10007024, + "step": 20335 + }, + { + "epoch": 2.6844397518806917, + "grad_norm": 0.03202318400144577, + "learning_rate": 1.0458694330770832e-06, + "loss": 0.0877, + "num_input_tokens_seen": 10009712, + "step": 20340 + }, + { + "epoch": 2.68509964365844, + "grad_norm": 0.05653927102684975, + "learning_rate": 1.0454092150114473e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10012400, + "step": 20345 + }, + { + "epoch": 2.6857595354361887, + "grad_norm": 0.02157328464090824, + "learning_rate": 1.0449489873080344e-06, + "loss": 0.0013, + "num_input_tokens_seen": 10014640, + "step": 20350 + }, + { + "epoch": 2.686419427213937, + "grad_norm": 0.020717937499284744, + "learning_rate": 1.0444887500645244e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10017200, + "step": 20355 + }, + { + "epoch": 2.6870793189916853, + "grad_norm": 0.23628821969032288, + "learning_rate": 1.0440285033785994e-06, + "loss": 0.0006, + "num_input_tokens_seen": 10019888, + "step": 20360 + }, + { + "epoch": 2.6877392107694336, + "grad_norm": 0.04174189642071724, + "learning_rate": 1.0435682473479433e-06, + "loss": 0.0873, + "num_input_tokens_seen": 10022064, + "step": 20365 + }, + { + "epoch": 2.6883991025471823, + "grad_norm": 0.04619302600622177, + "learning_rate": 1.0431079820702425e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10024496, + "step": 20370 + }, + { + "epoch": 2.6890589943249306, + "grad_norm": 0.17279821634292603, + "learning_rate": 1.042647707643184e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10027056, + "step": 20375 + }, + { + "epoch": 2.6897188861026793, + "grad_norm": 24.22002601623535, + "learning_rate": 1.0421874241644591e-06, + "loss": 0.0535, + "num_input_tokens_seen": 10029616, + "step": 20380 + }, + { + "epoch": 2.6903787778804276, + "grad_norm": 0.02293427661061287, + "learning_rate": 1.0417271317317585e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10032304, + "step": 20385 + }, + { + "epoch": 2.691038669658176, + "grad_norm": 0.008679247461259365, + "learning_rate": 1.0412668304427766e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10034800, + "step": 20390 + }, + { + "epoch": 2.6916985614359246, + "grad_norm": 0.5604121685028076, + "learning_rate": 1.0408065203952086e-06, + "loss": 0.0005, + "num_input_tokens_seen": 10037424, + "step": 20395 + }, + { + "epoch": 2.692358453213673, + "grad_norm": 0.049470458179712296, + "learning_rate": 1.040346201686752e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10039984, + "step": 20400 + }, + { + "epoch": 2.6930183449914216, + "grad_norm": 0.003851136425510049, + "learning_rate": 1.0398858744151067e-06, + "loss": 0.0596, + "num_input_tokens_seen": 10042672, + "step": 20405 + }, + { + "epoch": 2.69367823676917, + "grad_norm": 0.01271560974419117, + "learning_rate": 1.0394255386779728e-06, + "loss": 0.0004, + "num_input_tokens_seen": 10044912, + "step": 20410 + }, + { + "epoch": 2.694338128546918, + "grad_norm": 0.20090311765670776, + "learning_rate": 1.0389651945730545e-06, + "loss": 0.0016, + "num_input_tokens_seen": 10047216, + "step": 20415 + }, + { + "epoch": 2.694998020324667, + "grad_norm": 0.0027731643058359623, + "learning_rate": 1.0385048421980554e-06, + "loss": 0.1896, + "num_input_tokens_seen": 10049648, + "step": 20420 + }, + { + "epoch": 2.695657912102415, + "grad_norm": 0.42517948150634766, + "learning_rate": 1.0380444816506822e-06, + "loss": 0.0583, + "num_input_tokens_seen": 10052208, + "step": 20425 + }, + { + "epoch": 2.696317803880164, + "grad_norm": 0.04374484345316887, + "learning_rate": 1.0375841130286436e-06, + "loss": 0.0242, + "num_input_tokens_seen": 10054640, + "step": 20430 + }, + { + "epoch": 2.696977695657912, + "grad_norm": 0.02089790813624859, + "learning_rate": 1.0371237364296491e-06, + "loss": 0.1204, + "num_input_tokens_seen": 10057072, + "step": 20435 + }, + { + "epoch": 2.6976375874356604, + "grad_norm": 1.9048203229904175, + "learning_rate": 1.0366633519514104e-06, + "loss": 0.0581, + "num_input_tokens_seen": 10059376, + "step": 20440 + }, + { + "epoch": 2.698297479213409, + "grad_norm": 0.02070975862443447, + "learning_rate": 1.0362029596916407e-06, + "loss": 0.0596, + "num_input_tokens_seen": 10061936, + "step": 20445 + }, + { + "epoch": 2.6989573709911574, + "grad_norm": 0.12654846906661987, + "learning_rate": 1.0357425597480548e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10064240, + "step": 20450 + }, + { + "epoch": 2.699617262768906, + "grad_norm": 133.1804656982422, + "learning_rate": 1.0352821522183697e-06, + "loss": 0.0458, + "num_input_tokens_seen": 10066608, + "step": 20455 + }, + { + "epoch": 2.7002771545466544, + "grad_norm": 0.2598607838153839, + "learning_rate": 1.0348217372003032e-06, + "loss": 0.0203, + "num_input_tokens_seen": 10068848, + "step": 20460 + }, + { + "epoch": 2.7009370463244027, + "grad_norm": 0.03957448527216911, + "learning_rate": 1.0343613147915748e-06, + "loss": 0.0227, + "num_input_tokens_seen": 10071152, + "step": 20465 + }, + { + "epoch": 2.7015969381021514, + "grad_norm": 0.036427486687898636, + "learning_rate": 1.0339008850899067e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10073712, + "step": 20470 + }, + { + "epoch": 2.7022568298798997, + "grad_norm": 0.44466233253479004, + "learning_rate": 1.033440448193021e-06, + "loss": 0.0004, + "num_input_tokens_seen": 10076272, + "step": 20475 + }, + { + "epoch": 2.7029167216576484, + "grad_norm": 0.05536358058452606, + "learning_rate": 1.0329800041986423e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10078448, + "step": 20480 + }, + { + "epoch": 2.7035766134353967, + "grad_norm": 0.2589349150657654, + "learning_rate": 1.0325195532044966e-06, + "loss": 0.0009, + "num_input_tokens_seen": 10081008, + "step": 20485 + }, + { + "epoch": 2.704236505213145, + "grad_norm": 0.014972384087741375, + "learning_rate": 1.032059095308311e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10083312, + "step": 20490 + }, + { + "epoch": 2.7048963969908932, + "grad_norm": 0.010684339329600334, + "learning_rate": 1.0315986306078149e-06, + "loss": 0.0, + "num_input_tokens_seen": 10086192, + "step": 20495 + }, + { + "epoch": 2.705556288768642, + "grad_norm": 0.005539987236261368, + "learning_rate": 1.031138159200738e-06, + "loss": 0.097, + "num_input_tokens_seen": 10088432, + "step": 20500 + }, + { + "epoch": 2.7062161805463902, + "grad_norm": 0.04544892534613609, + "learning_rate": 1.0306776811848124e-06, + "loss": 0.0813, + "num_input_tokens_seen": 10091056, + "step": 20505 + }, + { + "epoch": 2.706876072324139, + "grad_norm": 0.05907962843775749, + "learning_rate": 1.030217196657771e-06, + "loss": 0.0367, + "num_input_tokens_seen": 10093552, + "step": 20510 + }, + { + "epoch": 2.7075359641018872, + "grad_norm": 82.98628997802734, + "learning_rate": 1.0297567057173486e-06, + "loss": 0.0882, + "num_input_tokens_seen": 10096048, + "step": 20515 + }, + { + "epoch": 2.7081958558796355, + "grad_norm": 0.7463860511779785, + "learning_rate": 1.0292962084612808e-06, + "loss": 0.0012, + "num_input_tokens_seen": 10098480, + "step": 20520 + }, + { + "epoch": 2.7088557476573842, + "grad_norm": 0.016940882429480553, + "learning_rate": 1.0288357049873051e-06, + "loss": 0.0012, + "num_input_tokens_seen": 10101360, + "step": 20525 + }, + { + "epoch": 2.7095156394351325, + "grad_norm": 18.495149612426758, + "learning_rate": 1.0283751953931595e-06, + "loss": 0.0799, + "num_input_tokens_seen": 10103856, + "step": 20530 + }, + { + "epoch": 2.7101755312128812, + "grad_norm": 2.1261818408966064, + "learning_rate": 1.0279146797765845e-06, + "loss": 0.1165, + "num_input_tokens_seen": 10106032, + "step": 20535 + }, + { + "epoch": 2.7108354229906295, + "grad_norm": 42.41480255126953, + "learning_rate": 1.0274541582353204e-06, + "loss": 0.1146, + "num_input_tokens_seen": 10108336, + "step": 20540 + }, + { + "epoch": 2.711495314768378, + "grad_norm": 0.36318209767341614, + "learning_rate": 1.0269936308671106e-06, + "loss": 0.0007, + "num_input_tokens_seen": 10111088, + "step": 20545 + }, + { + "epoch": 2.7121552065461265, + "grad_norm": 3.9034619331359863, + "learning_rate": 1.0265330977696977e-06, + "loss": 0.0595, + "num_input_tokens_seen": 10113584, + "step": 20550 + }, + { + "epoch": 2.712815098323875, + "grad_norm": 0.025046294555068016, + "learning_rate": 1.0260725590408273e-06, + "loss": 0.0, + "num_input_tokens_seen": 10116016, + "step": 20555 + }, + { + "epoch": 2.7134749901016235, + "grad_norm": 0.04526593163609505, + "learning_rate": 1.0256120147782445e-06, + "loss": 0.0612, + "num_input_tokens_seen": 10118768, + "step": 20560 + }, + { + "epoch": 2.714134881879372, + "grad_norm": 1.7549163103103638, + "learning_rate": 1.0251514650796975e-06, + "loss": 0.0013, + "num_input_tokens_seen": 10121008, + "step": 20565 + }, + { + "epoch": 2.71479477365712, + "grad_norm": 0.06778989732265472, + "learning_rate": 1.024690910042934e-06, + "loss": 0.003, + "num_input_tokens_seen": 10123760, + "step": 20570 + }, + { + "epoch": 2.715454665434869, + "grad_norm": 0.07780632376670837, + "learning_rate": 1.0242303497657038e-06, + "loss": 0.0534, + "num_input_tokens_seen": 10126128, + "step": 20575 + }, + { + "epoch": 2.716114557212617, + "grad_norm": 0.05785810574889183, + "learning_rate": 1.023769784345757e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10128560, + "step": 20580 + }, + { + "epoch": 2.716774448990366, + "grad_norm": 0.046384867280721664, + "learning_rate": 1.0233092138808457e-06, + "loss": 0.0412, + "num_input_tokens_seen": 10130992, + "step": 20585 + }, + { + "epoch": 2.717434340768114, + "grad_norm": 0.01017968449741602, + "learning_rate": 1.0228486384687226e-06, + "loss": 0.0032, + "num_input_tokens_seen": 10133744, + "step": 20590 + }, + { + "epoch": 2.7180942325458624, + "grad_norm": 0.09077580273151398, + "learning_rate": 1.0223880582071413e-06, + "loss": 0.0358, + "num_input_tokens_seen": 10136112, + "step": 20595 + }, + { + "epoch": 2.718754124323611, + "grad_norm": 0.011207741685211658, + "learning_rate": 1.0219274731938574e-06, + "loss": 0.0007, + "num_input_tokens_seen": 10138352, + "step": 20600 + }, + { + "epoch": 2.7194140161013594, + "grad_norm": 0.7007828950881958, + "learning_rate": 1.0214668835266255e-06, + "loss": 0.0695, + "num_input_tokens_seen": 10140720, + "step": 20605 + }, + { + "epoch": 2.720073907879108, + "grad_norm": 0.050733741372823715, + "learning_rate": 1.021006289303203e-06, + "loss": 0.0, + "num_input_tokens_seen": 10143024, + "step": 20610 + }, + { + "epoch": 2.7207337996568564, + "grad_norm": 0.011527138762176037, + "learning_rate": 1.020545690621348e-06, + "loss": 0.0383, + "num_input_tokens_seen": 10145456, + "step": 20615 + }, + { + "epoch": 2.7213936914346046, + "grad_norm": 0.3785325884819031, + "learning_rate": 1.0200850875788187e-06, + "loss": 0.0006, + "num_input_tokens_seen": 10147440, + "step": 20620 + }, + { + "epoch": 2.722053583212353, + "grad_norm": 0.1293078511953354, + "learning_rate": 1.0196244802733752e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10149808, + "step": 20625 + }, + { + "epoch": 2.7227134749901016, + "grad_norm": 0.05859058350324631, + "learning_rate": 1.0191638688027777e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10152240, + "step": 20630 + }, + { + "epoch": 2.72337336676785, + "grad_norm": 0.00426831329241395, + "learning_rate": 1.0187032532647881e-06, + "loss": 0.0, + "num_input_tokens_seen": 10154800, + "step": 20635 + }, + { + "epoch": 2.7240332585455986, + "grad_norm": 0.01532171294093132, + "learning_rate": 1.018242633757168e-06, + "loss": 0.0, + "num_input_tokens_seen": 10157104, + "step": 20640 + }, + { + "epoch": 2.724693150323347, + "grad_norm": 22.842222213745117, + "learning_rate": 1.0177820103776814e-06, + "loss": 0.1595, + "num_input_tokens_seen": 10159728, + "step": 20645 + }, + { + "epoch": 2.725353042101095, + "grad_norm": 0.14298970997333527, + "learning_rate": 1.0173213832240918e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10162288, + "step": 20650 + }, + { + "epoch": 2.726012933878844, + "grad_norm": 0.03597806766629219, + "learning_rate": 1.0168607523941637e-06, + "loss": 0.0004, + "num_input_tokens_seen": 10164784, + "step": 20655 + }, + { + "epoch": 2.726672825656592, + "grad_norm": 0.059991996735334396, + "learning_rate": 1.0164001179856635e-06, + "loss": 0.0767, + "num_input_tokens_seen": 10167344, + "step": 20660 + }, + { + "epoch": 2.727332717434341, + "grad_norm": 0.013564787805080414, + "learning_rate": 1.0159394800963565e-06, + "loss": 0.0, + "num_input_tokens_seen": 10169968, + "step": 20665 + }, + { + "epoch": 2.727992609212089, + "grad_norm": 0.10042696446180344, + "learning_rate": 1.0154788388240105e-06, + "loss": 0.086, + "num_input_tokens_seen": 10172400, + "step": 20670 + }, + { + "epoch": 2.7286525009898375, + "grad_norm": 0.03980396315455437, + "learning_rate": 1.015018194266393e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10174768, + "step": 20675 + }, + { + "epoch": 2.729312392767586, + "grad_norm": 0.02035425789654255, + "learning_rate": 1.0145575465212727e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10177136, + "step": 20680 + }, + { + "epoch": 2.7299722845453345, + "grad_norm": 18.365062713623047, + "learning_rate": 1.0140968956864186e-06, + "loss": 0.0355, + "num_input_tokens_seen": 10179312, + "step": 20685 + }, + { + "epoch": 2.730632176323083, + "grad_norm": 0.055683355778455734, + "learning_rate": 1.0136362418596004e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10181872, + "step": 20690 + }, + { + "epoch": 2.7312920681008315, + "grad_norm": 0.029079370200634003, + "learning_rate": 1.0131755851385883e-06, + "loss": 0.0874, + "num_input_tokens_seen": 10184240, + "step": 20695 + }, + { + "epoch": 2.7319519598785797, + "grad_norm": 0.0024721245281398296, + "learning_rate": 1.012714925621154e-06, + "loss": 0.0445, + "num_input_tokens_seen": 10186544, + "step": 20700 + }, + { + "epoch": 2.7326118516563285, + "grad_norm": 0.059845685958862305, + "learning_rate": 1.012254263405069e-06, + "loss": 0.0517, + "num_input_tokens_seen": 10189296, + "step": 20705 + }, + { + "epoch": 2.7332717434340768, + "grad_norm": 31.77506446838379, + "learning_rate": 1.0117935985881048e-06, + "loss": 0.08, + "num_input_tokens_seen": 10191984, + "step": 20710 + }, + { + "epoch": 2.7339316352118255, + "grad_norm": 0.00842016376554966, + "learning_rate": 1.0113329312680352e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10194608, + "step": 20715 + }, + { + "epoch": 2.7345915269895738, + "grad_norm": 0.16895176470279694, + "learning_rate": 1.0108722615426326e-06, + "loss": 0.0008, + "num_input_tokens_seen": 10197104, + "step": 20720 + }, + { + "epoch": 2.735251418767322, + "grad_norm": 0.0018568473169580102, + "learning_rate": 1.0104115895096715e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10199536, + "step": 20725 + }, + { + "epoch": 2.7359113105450708, + "grad_norm": 0.16196206212043762, + "learning_rate": 1.0099509152669257e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10202096, + "step": 20730 + }, + { + "epoch": 2.736571202322819, + "grad_norm": 0.007333566900342703, + "learning_rate": 1.0094902389121702e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10204464, + "step": 20735 + }, + { + "epoch": 2.7372310941005678, + "grad_norm": 0.0005558169796131551, + "learning_rate": 1.0090295605431805e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10207024, + "step": 20740 + }, + { + "epoch": 2.737890985878316, + "grad_norm": 0.06266650557518005, + "learning_rate": 1.0085688802577315e-06, + "loss": 0.0, + "num_input_tokens_seen": 10209712, + "step": 20745 + }, + { + "epoch": 2.7385508776560643, + "grad_norm": 0.0023195738904178143, + "learning_rate": 1.0081081981536001e-06, + "loss": 0.0011, + "num_input_tokens_seen": 10212144, + "step": 20750 + }, + { + "epoch": 2.7392107694338126, + "grad_norm": 0.010359673760831356, + "learning_rate": 1.0076475143285623e-06, + "loss": 0.0938, + "num_input_tokens_seen": 10214832, + "step": 20755 + }, + { + "epoch": 2.7398706612115613, + "grad_norm": 0.051527973264455795, + "learning_rate": 1.0071868288803948e-06, + "loss": 0.0229, + "num_input_tokens_seen": 10217328, + "step": 20760 + }, + { + "epoch": 2.7405305529893096, + "grad_norm": 0.0074024563655257225, + "learning_rate": 1.006726141906875e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10219696, + "step": 20765 + }, + { + "epoch": 2.7411904447670583, + "grad_norm": 0.0018134743440896273, + "learning_rate": 1.0062654535057805e-06, + "loss": 0.0504, + "num_input_tokens_seen": 10222064, + "step": 20770 + }, + { + "epoch": 2.7418503365448066, + "grad_norm": 27.99730682373047, + "learning_rate": 1.0058047637748886e-06, + "loss": 0.0955, + "num_input_tokens_seen": 10224752, + "step": 20775 + }, + { + "epoch": 2.742510228322555, + "grad_norm": 0.023540226742625237, + "learning_rate": 1.0053440728119778e-06, + "loss": 0.0611, + "num_input_tokens_seen": 10227248, + "step": 20780 + }, + { + "epoch": 2.7431701201003036, + "grad_norm": 0.04689393192529678, + "learning_rate": 1.0048833807148263e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10229744, + "step": 20785 + }, + { + "epoch": 2.743830011878052, + "grad_norm": 0.00846624094992876, + "learning_rate": 1.004422687581212e-06, + "loss": 0.0001, + "num_input_tokens_seen": 10232176, + "step": 20790 + }, + { + "epoch": 2.7444899036558006, + "grad_norm": 21.59259605407715, + "learning_rate": 1.0039619935089149e-06, + "loss": 0.179, + "num_input_tokens_seen": 10234608, + "step": 20795 + }, + { + "epoch": 2.745149795433549, + "grad_norm": 0.129641592502594, + "learning_rate": 1.0035012985957132e-06, + "loss": 0.0004, + "num_input_tokens_seen": 10237040, + "step": 20800 + }, + { + "epoch": 2.745809687211297, + "grad_norm": 0.11951775848865509, + "learning_rate": 1.0030406029393863e-06, + "loss": 0.0003, + "num_input_tokens_seen": 10239408, + "step": 20805 + }, + { + "epoch": 2.746469578989046, + "grad_norm": 0.00737581355497241, + "learning_rate": 1.0025799066377134e-06, + "loss": 0.1464, + "num_input_tokens_seen": 10241840, + "step": 20810 + }, + { + "epoch": 2.747129470766794, + "grad_norm": 0.12172198295593262, + "learning_rate": 1.0021192097884738e-06, + "loss": 0.0002, + "num_input_tokens_seen": 10244272, + "step": 20815 + }, + { + "epoch": 2.747789362544543, + "grad_norm": 0.1808333545923233, + "learning_rate": 1.0016585124894478e-06, + "loss": 0.0029, + "num_input_tokens_seen": 10246960, + "step": 20820 + }, + { + "epoch": 2.748449254322291, + "grad_norm": 1.3367594480514526, + "learning_rate": 1.0011978148384137e-06, + "loss": 0.0008, + "num_input_tokens_seen": 10249712, + "step": 20825 + }, + { + "epoch": 2.7491091461000394, + "grad_norm": 0.05913139134645462, + "learning_rate": 1.0007371169331527e-06, + "loss": 0.0627, + "num_input_tokens_seen": 10252400, + "step": 20830 + }, + { + "epoch": 2.749769037877788, + "grad_norm": 43.06571578979492, + "learning_rate": 1.0002764188714438e-06, + "loss": 0.0152, + "num_input_tokens_seen": 10255024, + "step": 20835 + }, + { + "epoch": 2.7504289296555364, + "grad_norm": 0.11080607026815414, + "learning_rate": 9.99815720751067e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10257392, + "step": 20840 + }, + { + "epoch": 2.751088821433285, + "grad_norm": 0.010921070352196693, + "learning_rate": 9.993550226698021e-07, + "loss": 0.0429, + "num_input_tokens_seen": 10259504, + "step": 20845 + }, + { + "epoch": 2.751088821433285, + "eval_loss": 0.16027498245239258, + "eval_runtime": 7.8273, + "eval_samples_per_second": 860.448, + "eval_steps_per_second": 107.572, + "num_input_tokens_seen": 10259504, + "step": 20845 + }, + { + "epoch": 2.7517487132110334, + "grad_norm": 0.03583168983459473, + "learning_rate": 9.988943247254293e-07, + "loss": 0.0397, + "num_input_tokens_seen": 10261808, + "step": 20850 + }, + { + "epoch": 2.7524086049887817, + "grad_norm": 0.18788190186023712, + "learning_rate": 9.984336270157277e-07, + "loss": 0.038, + "num_input_tokens_seen": 10264240, + "step": 20855 + }, + { + "epoch": 2.7530684967665304, + "grad_norm": 0.023686319589614868, + "learning_rate": 9.979729296384775e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10266736, + "step": 20860 + }, + { + "epoch": 2.7537283885442787, + "grad_norm": 39.64815902709961, + "learning_rate": 9.97512232691458e-07, + "loss": 0.2016, + "num_input_tokens_seen": 10269488, + "step": 20865 + }, + { + "epoch": 2.7543882803220274, + "grad_norm": 0.0550624318420887, + "learning_rate": 9.970515362724497e-07, + "loss": 0.0143, + "num_input_tokens_seen": 10271920, + "step": 20870 + }, + { + "epoch": 2.7550481720997757, + "grad_norm": 0.1452958583831787, + "learning_rate": 9.965908404792313e-07, + "loss": 0.1161, + "num_input_tokens_seen": 10274672, + "step": 20875 + }, + { + "epoch": 2.755708063877524, + "grad_norm": 0.05162237212061882, + "learning_rate": 9.96130145409582e-07, + "loss": 0.0524, + "num_input_tokens_seen": 10277424, + "step": 20880 + }, + { + "epoch": 2.7563679556552723, + "grad_norm": 0.006389949936419725, + "learning_rate": 9.956694511612817e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10279920, + "step": 20885 + }, + { + "epoch": 2.757027847433021, + "grad_norm": 5.551244735717773, + "learning_rate": 9.952087578321086e-07, + "loss": 0.0058, + "num_input_tokens_seen": 10282480, + "step": 20890 + }, + { + "epoch": 2.7576877392107697, + "grad_norm": 21.14126968383789, + "learning_rate": 9.947480655198423e-07, + "loss": 0.1002, + "num_input_tokens_seen": 10284976, + "step": 20895 + }, + { + "epoch": 2.758347630988518, + "grad_norm": 0.1591024398803711, + "learning_rate": 9.94287374322261e-07, + "loss": 0.0384, + "num_input_tokens_seen": 10287344, + "step": 20900 + }, + { + "epoch": 2.7590075227662663, + "grad_norm": 0.01715918444097042, + "learning_rate": 9.93826684337143e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10289648, + "step": 20905 + }, + { + "epoch": 2.7596674145440145, + "grad_norm": 0.01877519302070141, + "learning_rate": 9.933659956622668e-07, + "loss": 0.0342, + "num_input_tokens_seen": 10291952, + "step": 20910 + }, + { + "epoch": 2.7603273063217633, + "grad_norm": 10.56521987915039, + "learning_rate": 9.929053083954096e-07, + "loss": 0.0798, + "num_input_tokens_seen": 10294704, + "step": 20915 + }, + { + "epoch": 2.7609871980995115, + "grad_norm": 0.17608705163002014, + "learning_rate": 9.924446226343496e-07, + "loss": 0.0007, + "num_input_tokens_seen": 10297264, + "step": 20920 + }, + { + "epoch": 2.7616470898772603, + "grad_norm": 0.4646472632884979, + "learning_rate": 9.91983938476864e-07, + "loss": 0.077, + "num_input_tokens_seen": 10299312, + "step": 20925 + }, + { + "epoch": 2.7623069816550085, + "grad_norm": 22.721925735473633, + "learning_rate": 9.915232560207288e-07, + "loss": 0.0904, + "num_input_tokens_seen": 10301616, + "step": 20930 + }, + { + "epoch": 2.762966873432757, + "grad_norm": 0.05354856699705124, + "learning_rate": 9.910625753637215e-07, + "loss": 0.0017, + "num_input_tokens_seen": 10303984, + "step": 20935 + }, + { + "epoch": 2.7636267652105055, + "grad_norm": 0.7742305397987366, + "learning_rate": 9.906018966036177e-07, + "loss": 0.075, + "num_input_tokens_seen": 10306608, + "step": 20940 + }, + { + "epoch": 2.764286656988254, + "grad_norm": 0.024587344378232956, + "learning_rate": 9.901412198381935e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10309040, + "step": 20945 + }, + { + "epoch": 2.7649465487660025, + "grad_norm": 0.03141823783516884, + "learning_rate": 9.89680545165224e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10311280, + "step": 20950 + }, + { + "epoch": 2.765606440543751, + "grad_norm": 0.05810914188623428, + "learning_rate": 9.892198726824835e-07, + "loss": 0.0475, + "num_input_tokens_seen": 10313776, + "step": 20955 + }, + { + "epoch": 2.766266332321499, + "grad_norm": 14.158202171325684, + "learning_rate": 9.887592024877478e-07, + "loss": 0.0412, + "num_input_tokens_seen": 10316400, + "step": 20960 + }, + { + "epoch": 2.766926224099248, + "grad_norm": 0.1441594511270523, + "learning_rate": 9.882985346787892e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10319024, + "step": 20965 + }, + { + "epoch": 2.767586115876996, + "grad_norm": 0.030178377404808998, + "learning_rate": 9.878378693533825e-07, + "loss": 0.0068, + "num_input_tokens_seen": 10321584, + "step": 20970 + }, + { + "epoch": 2.768246007654745, + "grad_norm": 0.8287844657897949, + "learning_rate": 9.873772066092998e-07, + "loss": 0.0035, + "num_input_tokens_seen": 10323952, + "step": 20975 + }, + { + "epoch": 2.768905899432493, + "grad_norm": 0.029004383832216263, + "learning_rate": 9.869165465443132e-07, + "loss": 0.0556, + "num_input_tokens_seen": 10326384, + "step": 20980 + }, + { + "epoch": 2.7695657912102414, + "grad_norm": 0.0934871956706047, + "learning_rate": 9.864558892561955e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10328688, + "step": 20985 + }, + { + "epoch": 2.77022568298799, + "grad_norm": 17.891246795654297, + "learning_rate": 9.859952348427167e-07, + "loss": 0.0582, + "num_input_tokens_seen": 10331312, + "step": 20990 + }, + { + "epoch": 2.7708855747657384, + "grad_norm": 0.029909562319517136, + "learning_rate": 9.855345834016481e-07, + "loss": 0.0782, + "num_input_tokens_seen": 10333680, + "step": 20995 + }, + { + "epoch": 2.771545466543487, + "grad_norm": 0.11856421083211899, + "learning_rate": 9.850739350307595e-07, + "loss": 0.0438, + "num_input_tokens_seen": 10336240, + "step": 21000 + }, + { + "epoch": 2.7722053583212354, + "grad_norm": 0.02081143669784069, + "learning_rate": 9.846132898278198e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10338608, + "step": 21005 + }, + { + "epoch": 2.7728652500989837, + "grad_norm": 0.00258413958363235, + "learning_rate": 9.84152647890598e-07, + "loss": 0.1489, + "num_input_tokens_seen": 10341296, + "step": 21010 + }, + { + "epoch": 2.7735251418767324, + "grad_norm": 0.01568412408232689, + "learning_rate": 9.83692009316862e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10344048, + "step": 21015 + }, + { + "epoch": 2.7741850336544807, + "grad_norm": 0.010204672813415527, + "learning_rate": 9.832313742043792e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10346352, + "step": 21020 + }, + { + "epoch": 2.7748449254322294, + "grad_norm": 39.50893783569336, + "learning_rate": 9.827707426509155e-07, + "loss": 0.0472, + "num_input_tokens_seen": 10348784, + "step": 21025 + }, + { + "epoch": 2.7755048172099777, + "grad_norm": 0.0015126679791137576, + "learning_rate": 9.823101147542368e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10351344, + "step": 21030 + }, + { + "epoch": 2.776164708987726, + "grad_norm": 0.001049877842888236, + "learning_rate": 9.818494906121084e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10354032, + "step": 21035 + }, + { + "epoch": 2.776824600765474, + "grad_norm": 0.014882220886647701, + "learning_rate": 9.813888703222938e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10356656, + "step": 21040 + }, + { + "epoch": 2.777484492543223, + "grad_norm": 0.5062795281410217, + "learning_rate": 9.809282539825573e-07, + "loss": 0.0059, + "num_input_tokens_seen": 10359280, + "step": 21045 + }, + { + "epoch": 2.778144384320971, + "grad_norm": 23.332178115844727, + "learning_rate": 9.804676416906605e-07, + "loss": 0.0612, + "num_input_tokens_seen": 10361712, + "step": 21050 + }, + { + "epoch": 2.77880427609872, + "grad_norm": 0.014776087366044521, + "learning_rate": 9.800070335443651e-07, + "loss": 0.1814, + "num_input_tokens_seen": 10364400, + "step": 21055 + }, + { + "epoch": 2.779464167876468, + "grad_norm": 20.17340660095215, + "learning_rate": 9.795464296414323e-07, + "loss": 0.024, + "num_input_tokens_seen": 10367024, + "step": 21060 + }, + { + "epoch": 2.7801240596542165, + "grad_norm": 0.0953618586063385, + "learning_rate": 9.790858300796214e-07, + "loss": 0.0338, + "num_input_tokens_seen": 10369520, + "step": 21065 + }, + { + "epoch": 2.780783951431965, + "grad_norm": 0.0007482774672098458, + "learning_rate": 9.78625234956692e-07, + "loss": 0.004, + "num_input_tokens_seen": 10372336, + "step": 21070 + }, + { + "epoch": 2.7814438432097135, + "grad_norm": 29.122838973999023, + "learning_rate": 9.781646443704014e-07, + "loss": 0.0019, + "num_input_tokens_seen": 10375024, + "step": 21075 + }, + { + "epoch": 2.782103734987462, + "grad_norm": 0.10940604656934738, + "learning_rate": 9.777040584185072e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10377712, + "step": 21080 + }, + { + "epoch": 2.7827636267652105, + "grad_norm": 0.010563059709966183, + "learning_rate": 9.772434771987652e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10379952, + "step": 21085 + }, + { + "epoch": 2.7834235185429588, + "grad_norm": 0.3350667655467987, + "learning_rate": 9.7678290080893e-07, + "loss": 0.112, + "num_input_tokens_seen": 10382448, + "step": 21090 + }, + { + "epoch": 2.7840834103207075, + "grad_norm": 0.06816844642162323, + "learning_rate": 9.76322329346756e-07, + "loss": 0.0793, + "num_input_tokens_seen": 10384688, + "step": 21095 + }, + { + "epoch": 2.7847433020984558, + "grad_norm": 12.317327499389648, + "learning_rate": 9.758617629099961e-07, + "loss": 0.1091, + "num_input_tokens_seen": 10387120, + "step": 21100 + }, + { + "epoch": 2.7854031938762045, + "grad_norm": 0.005835865158587694, + "learning_rate": 9.754012015964027e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10389488, + "step": 21105 + }, + { + "epoch": 2.7860630856539528, + "grad_norm": 0.011800551787018776, + "learning_rate": 9.749406455037262e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10392048, + "step": 21110 + }, + { + "epoch": 2.786722977431701, + "grad_norm": 0.028174983337521553, + "learning_rate": 9.744800947297154e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10394416, + "step": 21115 + }, + { + "epoch": 2.7873828692094498, + "grad_norm": 0.036128319799900055, + "learning_rate": 9.740195493721204e-07, + "loss": 0.0831, + "num_input_tokens_seen": 10396912, + "step": 21120 + }, + { + "epoch": 2.788042760987198, + "grad_norm": 0.18786346912384033, + "learning_rate": 9.735590095286874e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10399280, + "step": 21125 + }, + { + "epoch": 2.7887026527649468, + "grad_norm": 0.5376497507095337, + "learning_rate": 9.730984752971634e-07, + "loss": 0.0007, + "num_input_tokens_seen": 10401968, + "step": 21130 + }, + { + "epoch": 2.789362544542695, + "grad_norm": 0.007869304157793522, + "learning_rate": 9.726379467752937e-07, + "loss": 0.0, + "num_input_tokens_seen": 10404912, + "step": 21135 + }, + { + "epoch": 2.7900224363204433, + "grad_norm": 0.02111724764108658, + "learning_rate": 9.721774240608208e-07, + "loss": 0.111, + "num_input_tokens_seen": 10407600, + "step": 21140 + }, + { + "epoch": 2.790682328098192, + "grad_norm": 0.016869166865944862, + "learning_rate": 9.71716907251489e-07, + "loss": 0.0037, + "num_input_tokens_seen": 10410096, + "step": 21145 + }, + { + "epoch": 2.7913422198759403, + "grad_norm": 0.8264169692993164, + "learning_rate": 9.712563964450378e-07, + "loss": 0.0089, + "num_input_tokens_seen": 10412720, + "step": 21150 + }, + { + "epoch": 2.792002111653689, + "grad_norm": 0.007358442526310682, + "learning_rate": 9.707958917392094e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10415088, + "step": 21155 + }, + { + "epoch": 2.7926620034314373, + "grad_norm": 15.352611541748047, + "learning_rate": 9.70335393231741e-07, + "loss": 0.0325, + "num_input_tokens_seen": 10417648, + "step": 21160 + }, + { + "epoch": 2.7933218952091856, + "grad_norm": 0.003411094658076763, + "learning_rate": 9.698749010203704e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10420016, + "step": 21165 + }, + { + "epoch": 2.793981786986934, + "grad_norm": 0.004778689704835415, + "learning_rate": 9.694144152028342e-07, + "loss": 0.0985, + "num_input_tokens_seen": 10422704, + "step": 21170 + }, + { + "epoch": 2.7946416787646826, + "grad_norm": 0.09724986553192139, + "learning_rate": 9.689539358768668e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10424752, + "step": 21175 + }, + { + "epoch": 2.795301570542431, + "grad_norm": 0.01130112074315548, + "learning_rate": 9.684934631402016e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10427312, + "step": 21180 + }, + { + "epoch": 2.7959614623201796, + "grad_norm": 0.010012752376496792, + "learning_rate": 9.68032997090571e-07, + "loss": 0.0, + "num_input_tokens_seen": 10429808, + "step": 21185 + }, + { + "epoch": 2.796621354097928, + "grad_norm": 0.005459084175527096, + "learning_rate": 9.675725378257047e-07, + "loss": 0.0, + "num_input_tokens_seen": 10432368, + "step": 21190 + }, + { + "epoch": 2.797281245875676, + "grad_norm": 0.09919703751802444, + "learning_rate": 9.67112085443333e-07, + "loss": 0.0902, + "num_input_tokens_seen": 10434672, + "step": 21195 + }, + { + "epoch": 2.797941137653425, + "grad_norm": 22.875263214111328, + "learning_rate": 9.666516400411826e-07, + "loss": 0.0641, + "num_input_tokens_seen": 10437168, + "step": 21200 + }, + { + "epoch": 2.798601029431173, + "grad_norm": 0.0013189826859161258, + "learning_rate": 9.661912017169803e-07, + "loss": 0.0, + "num_input_tokens_seen": 10439472, + "step": 21205 + }, + { + "epoch": 2.799260921208922, + "grad_norm": 0.0005493463831953704, + "learning_rate": 9.657307705684507e-07, + "loss": 0.0, + "num_input_tokens_seen": 10441840, + "step": 21210 + }, + { + "epoch": 2.79992081298667, + "grad_norm": 0.057411447167396545, + "learning_rate": 9.652703466933167e-07, + "loss": 0.0975, + "num_input_tokens_seen": 10444272, + "step": 21215 + }, + { + "epoch": 2.8005807047644184, + "grad_norm": 0.011182377114892006, + "learning_rate": 9.648099301893003e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10446832, + "step": 21220 + }, + { + "epoch": 2.801240596542167, + "grad_norm": 0.0030372650362551212, + "learning_rate": 9.643495211541212e-07, + "loss": 0.135, + "num_input_tokens_seen": 10449136, + "step": 21225 + }, + { + "epoch": 2.8019004883199154, + "grad_norm": 0.08348898589611053, + "learning_rate": 9.63889119685498e-07, + "loss": 0.0473, + "num_input_tokens_seen": 10451760, + "step": 21230 + }, + { + "epoch": 2.802560380097664, + "grad_norm": 0.0047517018392682076, + "learning_rate": 9.634287258811481e-07, + "loss": 0.0, + "num_input_tokens_seen": 10454000, + "step": 21235 + }, + { + "epoch": 2.8032202718754125, + "grad_norm": 0.04656297713518143, + "learning_rate": 9.62968339838786e-07, + "loss": 0.0016, + "num_input_tokens_seen": 10456304, + "step": 21240 + }, + { + "epoch": 2.8038801636531607, + "grad_norm": 0.08442453294992447, + "learning_rate": 9.625079616561256e-07, + "loss": 0.0027, + "num_input_tokens_seen": 10458800, + "step": 21245 + }, + { + "epoch": 2.8045400554309095, + "grad_norm": 0.0003304073470644653, + "learning_rate": 9.620475914308787e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10461232, + "step": 21250 + }, + { + "epoch": 2.8051999472086577, + "grad_norm": 1.3611310720443726, + "learning_rate": 9.615872292607559e-07, + "loss": 0.2071, + "num_input_tokens_seen": 10463536, + "step": 21255 + }, + { + "epoch": 2.8058598389864065, + "grad_norm": 25.757781982421875, + "learning_rate": 9.611268752434658e-07, + "loss": 0.2321, + "num_input_tokens_seen": 10465904, + "step": 21260 + }, + { + "epoch": 2.8065197307641547, + "grad_norm": 0.04057693853974342, + "learning_rate": 9.606665294767144e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10468272, + "step": 21265 + }, + { + "epoch": 2.807179622541903, + "grad_norm": 0.010599908418953419, + "learning_rate": 9.602061920582076e-07, + "loss": 0.0368, + "num_input_tokens_seen": 10470576, + "step": 21270 + }, + { + "epoch": 2.8078395143196517, + "grad_norm": 0.010033125057816505, + "learning_rate": 9.59745863085648e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10472944, + "step": 21275 + }, + { + "epoch": 2.8084994060974, + "grad_norm": 0.03106347844004631, + "learning_rate": 9.59285542656738e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10475248, + "step": 21280 + }, + { + "epoch": 2.8091592978751487, + "grad_norm": 13.835277557373047, + "learning_rate": 9.588252308691768e-07, + "loss": 0.0534, + "num_input_tokens_seen": 10477808, + "step": 21285 + }, + { + "epoch": 2.809819189652897, + "grad_norm": 0.7269652485847473, + "learning_rate": 9.583649278206616e-07, + "loss": 0.0402, + "num_input_tokens_seen": 10480176, + "step": 21290 + }, + { + "epoch": 2.8104790814306453, + "grad_norm": 0.0030855233781039715, + "learning_rate": 9.579046336088894e-07, + "loss": 0.0887, + "num_input_tokens_seen": 10482352, + "step": 21295 + }, + { + "epoch": 2.8111389732083936, + "grad_norm": 0.20838147401809692, + "learning_rate": 9.574443483315533e-07, + "loss": 0.2153, + "num_input_tokens_seen": 10484912, + "step": 21300 + }, + { + "epoch": 2.8117988649861423, + "grad_norm": 0.08771320432424545, + "learning_rate": 9.569840720863469e-07, + "loss": 0.2879, + "num_input_tokens_seen": 10487216, + "step": 21305 + }, + { + "epoch": 2.8124587567638906, + "grad_norm": 0.46458372473716736, + "learning_rate": 9.565238049709596e-07, + "loss": 0.0255, + "num_input_tokens_seen": 10489840, + "step": 21310 + }, + { + "epoch": 2.8131186485416393, + "grad_norm": 0.06488537788391113, + "learning_rate": 9.560635470830794e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10492272, + "step": 21315 + }, + { + "epoch": 2.8137785403193876, + "grad_norm": 38.66447830200195, + "learning_rate": 9.556032985203934e-07, + "loss": 0.0089, + "num_input_tokens_seen": 10494768, + "step": 21320 + }, + { + "epoch": 2.814438432097136, + "grad_norm": 0.5035339593887329, + "learning_rate": 9.551430593805854e-07, + "loss": 0.0758, + "num_input_tokens_seen": 10497328, + "step": 21325 + }, + { + "epoch": 2.8150983238748846, + "grad_norm": 71.4471664428711, + "learning_rate": 9.546828297613389e-07, + "loss": 0.0628, + "num_input_tokens_seen": 10500016, + "step": 21330 + }, + { + "epoch": 2.815758215652633, + "grad_norm": 0.12810802459716797, + "learning_rate": 9.542226097603335e-07, + "loss": 0.1013, + "num_input_tokens_seen": 10502448, + "step": 21335 + }, + { + "epoch": 2.8164181074303816, + "grad_norm": 0.014600202441215515, + "learning_rate": 9.537623994752473e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10504944, + "step": 21340 + }, + { + "epoch": 2.81707799920813, + "grad_norm": 0.04110288992524147, + "learning_rate": 9.533021990037572e-07, + "loss": 0.0752, + "num_input_tokens_seen": 10507440, + "step": 21345 + }, + { + "epoch": 2.817737890985878, + "grad_norm": 0.030138636007905006, + "learning_rate": 9.52842008443537e-07, + "loss": 0.0179, + "num_input_tokens_seen": 10509680, + "step": 21350 + }, + { + "epoch": 2.818397782763627, + "grad_norm": 0.3603948652744293, + "learning_rate": 9.523818278922593e-07, + "loss": 0.002, + "num_input_tokens_seen": 10512112, + "step": 21355 + }, + { + "epoch": 2.819057674541375, + "grad_norm": 158.56179809570312, + "learning_rate": 9.519216574475937e-07, + "loss": 0.0933, + "num_input_tokens_seen": 10514480, + "step": 21360 + }, + { + "epoch": 2.819717566319124, + "grad_norm": 0.2659551799297333, + "learning_rate": 9.514614972072082e-07, + "loss": 0.0934, + "num_input_tokens_seen": 10517040, + "step": 21365 + }, + { + "epoch": 2.820377458096872, + "grad_norm": 0.053979117423295975, + "learning_rate": 9.510013472687683e-07, + "loss": 0.0255, + "num_input_tokens_seen": 10519600, + "step": 21370 + }, + { + "epoch": 2.8210373498746204, + "grad_norm": 0.17275747656822205, + "learning_rate": 9.505412077299377e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10522288, + "step": 21375 + }, + { + "epoch": 2.821697241652369, + "grad_norm": 0.17458002269268036, + "learning_rate": 9.500810786883776e-07, + "loss": 0.0009, + "num_input_tokens_seen": 10524976, + "step": 21380 + }, + { + "epoch": 2.8223571334301174, + "grad_norm": 0.04548027738928795, + "learning_rate": 9.496209602417472e-07, + "loss": 0.034, + "num_input_tokens_seen": 10527600, + "step": 21385 + }, + { + "epoch": 2.823017025207866, + "grad_norm": 0.009598941542208195, + "learning_rate": 9.49160852487703e-07, + "loss": 0.0767, + "num_input_tokens_seen": 10529968, + "step": 21390 + }, + { + "epoch": 2.8236769169856144, + "grad_norm": 0.37783247232437134, + "learning_rate": 9.487007555238997e-07, + "loss": 0.076, + "num_input_tokens_seen": 10532272, + "step": 21395 + }, + { + "epoch": 2.8243368087633627, + "grad_norm": 20.97260856628418, + "learning_rate": 9.482406694479895e-07, + "loss": 0.0648, + "num_input_tokens_seen": 10535152, + "step": 21400 + }, + { + "epoch": 2.8249967005411114, + "grad_norm": 21.591175079345703, + "learning_rate": 9.477805943576226e-07, + "loss": 0.1232, + "num_input_tokens_seen": 10537712, + "step": 21405 + }, + { + "epoch": 2.8256565923188597, + "grad_norm": 0.01995622180402279, + "learning_rate": 9.473205303504463e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10540016, + "step": 21410 + }, + { + "epoch": 2.8263164840966084, + "grad_norm": 18.156414031982422, + "learning_rate": 9.468604775241061e-07, + "loss": 0.1321, + "num_input_tokens_seen": 10542512, + "step": 21415 + }, + { + "epoch": 2.8269763758743567, + "grad_norm": 0.2256481796503067, + "learning_rate": 9.464004359762445e-07, + "loss": 0.0401, + "num_input_tokens_seen": 10545136, + "step": 21420 + }, + { + "epoch": 2.827636267652105, + "grad_norm": 0.026353495195508003, + "learning_rate": 9.459404058045023e-07, + "loss": 0.0018, + "num_input_tokens_seen": 10547760, + "step": 21425 + }, + { + "epoch": 2.8282961594298532, + "grad_norm": 0.08316987007856369, + "learning_rate": 9.454803871065176e-07, + "loss": 0.0399, + "num_input_tokens_seen": 10549936, + "step": 21430 + }, + { + "epoch": 2.828956051207602, + "grad_norm": 0.13066366314888, + "learning_rate": 9.450203799799258e-07, + "loss": 0.0011, + "num_input_tokens_seen": 10552176, + "step": 21435 + }, + { + "epoch": 2.8296159429853502, + "grad_norm": 0.5951442122459412, + "learning_rate": 9.445603845223603e-07, + "loss": 0.0415, + "num_input_tokens_seen": 10554736, + "step": 21440 + }, + { + "epoch": 2.830275834763099, + "grad_norm": 95.009521484375, + "learning_rate": 9.44100400831452e-07, + "loss": 0.1261, + "num_input_tokens_seen": 10557296, + "step": 21445 + }, + { + "epoch": 2.8309357265408472, + "grad_norm": 0.011956310831010342, + "learning_rate": 9.436404290048282e-07, + "loss": 0.0783, + "num_input_tokens_seen": 10559984, + "step": 21450 + }, + { + "epoch": 2.8315956183185955, + "grad_norm": 0.029880542308092117, + "learning_rate": 9.43180469140116e-07, + "loss": 0.1771, + "num_input_tokens_seen": 10562416, + "step": 21455 + }, + { + "epoch": 2.8322555100963442, + "grad_norm": 0.8779613971710205, + "learning_rate": 9.427205213349369e-07, + "loss": 0.0008, + "num_input_tokens_seen": 10564976, + "step": 21460 + }, + { + "epoch": 2.8329154018740925, + "grad_norm": 0.3202302157878876, + "learning_rate": 9.422605856869129e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10567728, + "step": 21465 + }, + { + "epoch": 2.8335752936518412, + "grad_norm": 0.02996434085071087, + "learning_rate": 9.418006622936618e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10570416, + "step": 21470 + }, + { + "epoch": 2.8342351854295895, + "grad_norm": 0.04730546846985817, + "learning_rate": 9.413407512527977e-07, + "loss": 0.1179, + "num_input_tokens_seen": 10572784, + "step": 21475 + }, + { + "epoch": 2.834895077207338, + "grad_norm": 0.042985014617443085, + "learning_rate": 9.408808526619352e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10575152, + "step": 21480 + }, + { + "epoch": 2.8355549689850865, + "grad_norm": 0.09430090337991714, + "learning_rate": 9.404209666186831e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10577648, + "step": 21485 + }, + { + "epoch": 2.836214860762835, + "grad_norm": 0.019693154841661453, + "learning_rate": 9.3996109322065e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10580208, + "step": 21490 + }, + { + "epoch": 2.8368747525405835, + "grad_norm": 0.06802091002464294, + "learning_rate": 9.395012325654398e-07, + "loss": 0.0355, + "num_input_tokens_seen": 10582512, + "step": 21495 + }, + { + "epoch": 2.837534644318332, + "grad_norm": 0.004724997561424971, + "learning_rate": 9.390413847506547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10584944, + "step": 21500 + }, + { + "epoch": 2.83819453609608, + "grad_norm": 0.02163371816277504, + "learning_rate": 9.385815498738944e-07, + "loss": 0.049, + "num_input_tokens_seen": 10587248, + "step": 21505 + }, + { + "epoch": 2.838854427873829, + "grad_norm": 33.175628662109375, + "learning_rate": 9.381217280327552e-07, + "loss": 0.0809, + "num_input_tokens_seen": 10590000, + "step": 21510 + }, + { + "epoch": 2.839514319651577, + "grad_norm": 0.0046610478311777115, + "learning_rate": 9.376619193248314e-07, + "loss": 0.0, + "num_input_tokens_seen": 10592496, + "step": 21515 + }, + { + "epoch": 2.840174211429326, + "grad_norm": 0.015222937799990177, + "learning_rate": 9.372021238477138e-07, + "loss": 0.0767, + "num_input_tokens_seen": 10595184, + "step": 21520 + }, + { + "epoch": 2.840834103207074, + "grad_norm": 0.05140548199415207, + "learning_rate": 9.367423416989905e-07, + "loss": 0.001, + "num_input_tokens_seen": 10597552, + "step": 21525 + }, + { + "epoch": 2.8414939949848224, + "grad_norm": 0.018608352169394493, + "learning_rate": 9.362825729762472e-07, + "loss": 0.1518, + "num_input_tokens_seen": 10600240, + "step": 21530 + }, + { + "epoch": 2.842153886762571, + "grad_norm": 0.1203496903181076, + "learning_rate": 9.358228177770663e-07, + "loss": 0.1066, + "num_input_tokens_seen": 10602608, + "step": 21535 + }, + { + "epoch": 2.8428137785403194, + "grad_norm": 31.338871002197266, + "learning_rate": 9.353630761990276e-07, + "loss": 0.0717, + "num_input_tokens_seen": 10605104, + "step": 21540 + }, + { + "epoch": 2.843473670318068, + "grad_norm": 0.056929174810647964, + "learning_rate": 9.349033483397082e-07, + "loss": 0.0023, + "num_input_tokens_seen": 10607600, + "step": 21545 + }, + { + "epoch": 2.8441335620958164, + "grad_norm": 0.01509710494428873, + "learning_rate": 9.344436342966812e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10610160, + "step": 21550 + }, + { + "epoch": 2.8447934538735646, + "grad_norm": 0.029244285076856613, + "learning_rate": 9.339839341675185e-07, + "loss": 0.0421, + "num_input_tokens_seen": 10612400, + "step": 21555 + }, + { + "epoch": 2.845453345651313, + "grad_norm": 0.17152266204357147, + "learning_rate": 9.335242480497876e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10615088, + "step": 21560 + }, + { + "epoch": 2.8461132374290616, + "grad_norm": 0.416633278131485, + "learning_rate": 9.330645760410537e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10617648, + "step": 21565 + }, + { + "epoch": 2.8467731292068104, + "grad_norm": 0.05466151982545853, + "learning_rate": 9.326049182388789e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10620336, + "step": 21570 + }, + { + "epoch": 2.8474330209845586, + "grad_norm": 0.0032727643847465515, + "learning_rate": 9.32145274740822e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10622704, + "step": 21575 + }, + { + "epoch": 2.848092912762307, + "grad_norm": 0.0016575565095990896, + "learning_rate": 9.316856456444392e-07, + "loss": 0.0407, + "num_input_tokens_seen": 10625264, + "step": 21580 + }, + { + "epoch": 2.848752804540055, + "grad_norm": 0.077976755797863, + "learning_rate": 9.312260310472833e-07, + "loss": 0.0736, + "num_input_tokens_seen": 10628016, + "step": 21585 + }, + { + "epoch": 2.849412696317804, + "grad_norm": 0.012262091040611267, + "learning_rate": 9.307664310469046e-07, + "loss": 0.0009, + "num_input_tokens_seen": 10630384, + "step": 21590 + }, + { + "epoch": 2.850072588095552, + "grad_norm": 0.009740821085870266, + "learning_rate": 9.303068457408497e-07, + "loss": 0.0016, + "num_input_tokens_seen": 10632688, + "step": 21595 + }, + { + "epoch": 2.850732479873301, + "grad_norm": 0.004587103612720966, + "learning_rate": 9.298472752266615e-07, + "loss": 0.0518, + "num_input_tokens_seen": 10634800, + "step": 21600 + }, + { + "epoch": 2.851392371651049, + "grad_norm": 0.1813964694738388, + "learning_rate": 9.293877196018816e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10637104, + "step": 21605 + }, + { + "epoch": 2.8520522634287975, + "grad_norm": 0.0014675756683573127, + "learning_rate": 9.289281789640465e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10639408, + "step": 21610 + }, + { + "epoch": 2.852712155206546, + "grad_norm": 16.158531188964844, + "learning_rate": 9.28468653410691e-07, + "loss": 0.274, + "num_input_tokens_seen": 10641584, + "step": 21615 + }, + { + "epoch": 2.8533720469842945, + "grad_norm": 0.003586375620216131, + "learning_rate": 9.280091430393462e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10643888, + "step": 21620 + }, + { + "epoch": 2.854031938762043, + "grad_norm": 0.005891416687518358, + "learning_rate": 9.275496479475386e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10646512, + "step": 21625 + }, + { + "epoch": 2.8546918305397915, + "grad_norm": 0.7463214993476868, + "learning_rate": 9.270901682327945e-07, + "loss": 0.001, + "num_input_tokens_seen": 10648880, + "step": 21630 + }, + { + "epoch": 2.8553517223175398, + "grad_norm": 3.2659404277801514, + "learning_rate": 9.266307039926333e-07, + "loss": 0.0012, + "num_input_tokens_seen": 10651440, + "step": 21635 + }, + { + "epoch": 2.8560116140952885, + "grad_norm": 0.0117728216573596, + "learning_rate": 9.261712553245747e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10654000, + "step": 21640 + }, + { + "epoch": 2.8566715058730368, + "grad_norm": 49.505550384521484, + "learning_rate": 9.257118223261323e-07, + "loss": 0.202, + "num_input_tokens_seen": 10656560, + "step": 21645 + }, + { + "epoch": 2.8573313976507855, + "grad_norm": 14.688985824584961, + "learning_rate": 9.252524050948174e-07, + "loss": 0.0427, + "num_input_tokens_seen": 10658928, + "step": 21650 + }, + { + "epoch": 2.8579912894285338, + "grad_norm": 0.007706925738602877, + "learning_rate": 9.247930037281385e-07, + "loss": 0.0675, + "num_input_tokens_seen": 10661360, + "step": 21655 + }, + { + "epoch": 2.858651181206282, + "grad_norm": 0.06840179115533829, + "learning_rate": 9.243336183235995e-07, + "loss": 0.0338, + "num_input_tokens_seen": 10663920, + "step": 21660 + }, + { + "epoch": 2.8593110729840308, + "grad_norm": 0.018307015299797058, + "learning_rate": 9.238742489787027e-07, + "loss": 0.1112, + "num_input_tokens_seen": 10666416, + "step": 21665 + }, + { + "epoch": 2.859970964761779, + "grad_norm": 0.06790940463542938, + "learning_rate": 9.234148957909451e-07, + "loss": 0.0335, + "num_input_tokens_seen": 10668656, + "step": 21670 + }, + { + "epoch": 2.8606308565395278, + "grad_norm": 1.0796475410461426, + "learning_rate": 9.229555588578211e-07, + "loss": 0.1067, + "num_input_tokens_seen": 10671152, + "step": 21675 + }, + { + "epoch": 2.861290748317276, + "grad_norm": 0.013293218798935413, + "learning_rate": 9.22496238276822e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10673392, + "step": 21680 + }, + { + "epoch": 2.8619506400950243, + "grad_norm": 0.10564276576042175, + "learning_rate": 9.220369341454348e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10675696, + "step": 21685 + }, + { + "epoch": 2.8626105318727726, + "grad_norm": 0.21768558025360107, + "learning_rate": 9.215776465611441e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10677936, + "step": 21690 + }, + { + "epoch": 2.8632704236505213, + "grad_norm": 0.012424738146364689, + "learning_rate": 9.2111837562143e-07, + "loss": 0.0022, + "num_input_tokens_seen": 10680880, + "step": 21695 + }, + { + "epoch": 2.86393031542827, + "grad_norm": 0.011372094973921776, + "learning_rate": 9.206591214237692e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10683056, + "step": 21700 + }, + { + "epoch": 2.8645902072060183, + "grad_norm": 0.023209456354379654, + "learning_rate": 9.201998840656355e-07, + "loss": 0.1564, + "num_input_tokens_seen": 10685552, + "step": 21705 + }, + { + "epoch": 2.8652500989837666, + "grad_norm": 0.3390696942806244, + "learning_rate": 9.197406636444984e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10687728, + "step": 21710 + }, + { + "epoch": 2.865909990761515, + "grad_norm": 0.0022603331599384546, + "learning_rate": 9.192814602578245e-07, + "loss": 0.0323, + "num_input_tokens_seen": 10690352, + "step": 21715 + }, + { + "epoch": 2.8665698825392636, + "grad_norm": 0.12818442285060883, + "learning_rate": 9.188222740030759e-07, + "loss": 0.1149, + "num_input_tokens_seen": 10693168, + "step": 21720 + }, + { + "epoch": 2.867229774317012, + "grad_norm": 0.04778322950005531, + "learning_rate": 9.18363104977712e-07, + "loss": 0.0007, + "num_input_tokens_seen": 10695600, + "step": 21725 + }, + { + "epoch": 2.8678896660947606, + "grad_norm": 17.72599983215332, + "learning_rate": 9.179039532791879e-07, + "loss": 0.0816, + "num_input_tokens_seen": 10698032, + "step": 21730 + }, + { + "epoch": 2.868549557872509, + "grad_norm": 0.06735151261091232, + "learning_rate": 9.174448190049551e-07, + "loss": 0.0805, + "num_input_tokens_seen": 10700272, + "step": 21735 + }, + { + "epoch": 2.869209449650257, + "grad_norm": 281.5223083496094, + "learning_rate": 9.169857022524616e-07, + "loss": 0.0385, + "num_input_tokens_seen": 10702640, + "step": 21740 + }, + { + "epoch": 2.869869341428006, + "grad_norm": 17.493099212646484, + "learning_rate": 9.165266031191518e-07, + "loss": 0.1961, + "num_input_tokens_seen": 10705136, + "step": 21745 + }, + { + "epoch": 2.870529233205754, + "grad_norm": 0.27138352394104004, + "learning_rate": 9.160675217024659e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10707824, + "step": 21750 + }, + { + "epoch": 2.871189124983503, + "grad_norm": 33.14533233642578, + "learning_rate": 9.156084580998409e-07, + "loss": 0.1185, + "num_input_tokens_seen": 10710064, + "step": 21755 + }, + { + "epoch": 2.871849016761251, + "grad_norm": 14.331830978393555, + "learning_rate": 9.151494124087093e-07, + "loss": 0.0926, + "num_input_tokens_seen": 10712432, + "step": 21760 + }, + { + "epoch": 2.8725089085389994, + "grad_norm": 35.13714599609375, + "learning_rate": 9.146903847265008e-07, + "loss": 0.0872, + "num_input_tokens_seen": 10714672, + "step": 21765 + }, + { + "epoch": 2.873168800316748, + "grad_norm": 0.03220439702272415, + "learning_rate": 9.142313751506401e-07, + "loss": 0.0009, + "num_input_tokens_seen": 10716912, + "step": 21770 + }, + { + "epoch": 2.8738286920944964, + "grad_norm": 0.11148615926504135, + "learning_rate": 9.137723837785491e-07, + "loss": 0.0009, + "num_input_tokens_seen": 10719600, + "step": 21775 + }, + { + "epoch": 2.874488583872245, + "grad_norm": 0.7292298674583435, + "learning_rate": 9.133134107076455e-07, + "loss": 0.0015, + "num_input_tokens_seen": 10721904, + "step": 21780 + }, + { + "epoch": 2.8751484756499934, + "grad_norm": 0.5308613181114197, + "learning_rate": 9.12854456035342e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10724720, + "step": 21785 + }, + { + "epoch": 2.8758083674277417, + "grad_norm": 0.7017931938171387, + "learning_rate": 9.123955198590498e-07, + "loss": 0.0011, + "num_input_tokens_seen": 10727216, + "step": 21790 + }, + { + "epoch": 2.8764682592054904, + "grad_norm": 0.049583759158849716, + "learning_rate": 9.119366022761736e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10729648, + "step": 21795 + }, + { + "epoch": 2.8771281509832387, + "grad_norm": 0.0807613804936409, + "learning_rate": 9.114777033841162e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10732016, + "step": 21800 + }, + { + "epoch": 2.8777880427609874, + "grad_norm": 0.036206115037202835, + "learning_rate": 9.110188232802756e-07, + "loss": 0.058, + "num_input_tokens_seen": 10734320, + "step": 21805 + }, + { + "epoch": 2.8784479345387357, + "grad_norm": 0.020139062777161598, + "learning_rate": 9.105599620620446e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10737008, + "step": 21810 + }, + { + "epoch": 2.879107826316484, + "grad_norm": 34.06182098388672, + "learning_rate": 9.101011198268146e-07, + "loss": 0.0938, + "num_input_tokens_seen": 10739632, + "step": 21815 + }, + { + "epoch": 2.8797677180942327, + "grad_norm": 0.03170971944928169, + "learning_rate": 9.096422966719704e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10742384, + "step": 21820 + }, + { + "epoch": 2.880427609871981, + "grad_norm": 0.017847511917352676, + "learning_rate": 9.091834926948949e-07, + "loss": 0.1499, + "num_input_tokens_seen": 10744880, + "step": 21825 + }, + { + "epoch": 2.8810875016497297, + "grad_norm": 0.019295165315270424, + "learning_rate": 9.087247079929654e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10747632, + "step": 21830 + }, + { + "epoch": 2.881747393427478, + "grad_norm": 0.028995471075177193, + "learning_rate": 9.082659426635554e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10750128, + "step": 21835 + }, + { + "epoch": 2.8824072852052263, + "grad_norm": 0.011393583379685879, + "learning_rate": 9.07807196804035e-07, + "loss": 0.1334, + "num_input_tokens_seen": 10752880, + "step": 21840 + }, + { + "epoch": 2.8830671769829745, + "grad_norm": 0.006427302956581116, + "learning_rate": 9.073484705117691e-07, + "loss": 0.1001, + "num_input_tokens_seen": 10755504, + "step": 21845 + }, + { + "epoch": 2.8837270687607233, + "grad_norm": 0.007785051595419645, + "learning_rate": 9.068897638841197e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10757808, + "step": 21850 + }, + { + "epoch": 2.8843869605384715, + "grad_norm": 0.005997618660330772, + "learning_rate": 9.064310770184438e-07, + "loss": 0.0008, + "num_input_tokens_seen": 10760432, + "step": 21855 + }, + { + "epoch": 2.8850468523162203, + "grad_norm": 0.1344568282365799, + "learning_rate": 9.059724100120939e-07, + "loss": 0.0008, + "num_input_tokens_seen": 10762864, + "step": 21860 + }, + { + "epoch": 2.8857067440939685, + "grad_norm": 0.0011155613465234637, + "learning_rate": 9.055137629624194e-07, + "loss": 0.0027, + "num_input_tokens_seen": 10765232, + "step": 21865 + }, + { + "epoch": 2.886366635871717, + "grad_norm": 0.02373356744647026, + "learning_rate": 9.05055135966764e-07, + "loss": 0.0253, + "num_input_tokens_seen": 10767600, + "step": 21870 + }, + { + "epoch": 2.8870265276494655, + "grad_norm": 0.0023866845294833183, + "learning_rate": 9.04596529122469e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10769712, + "step": 21875 + }, + { + "epoch": 2.887686419427214, + "grad_norm": 0.04158183932304382, + "learning_rate": 9.041379425268697e-07, + "loss": 0.0, + "num_input_tokens_seen": 10771952, + "step": 21880 + }, + { + "epoch": 2.8883463112049625, + "grad_norm": 0.058639414608478546, + "learning_rate": 9.036793762772977e-07, + "loss": 0.0676, + "num_input_tokens_seen": 10774512, + "step": 21885 + }, + { + "epoch": 2.889006202982711, + "grad_norm": 0.00817857775837183, + "learning_rate": 9.032208304710808e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10776944, + "step": 21890 + }, + { + "epoch": 2.889666094760459, + "grad_norm": 1.1033525466918945, + "learning_rate": 9.027623052055417e-07, + "loss": 0.0007, + "num_input_tokens_seen": 10779568, + "step": 21895 + }, + { + "epoch": 2.890325986538208, + "grad_norm": 0.013293600641191006, + "learning_rate": 9.023038005779992e-07, + "loss": 0.0041, + "num_input_tokens_seen": 10782512, + "step": 21900 + }, + { + "epoch": 2.890985878315956, + "grad_norm": 0.01595185324549675, + "learning_rate": 9.018453166857677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10784816, + "step": 21905 + }, + { + "epoch": 2.891645770093705, + "grad_norm": 0.010223422199487686, + "learning_rate": 9.013868536261566e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10787504, + "step": 21910 + }, + { + "epoch": 2.892305661871453, + "grad_norm": 0.001329478225670755, + "learning_rate": 9.009284114964721e-07, + "loss": 0.0, + "num_input_tokens_seen": 10789936, + "step": 21915 + }, + { + "epoch": 2.8929655536492014, + "grad_norm": 0.10551278293132782, + "learning_rate": 9.004699903940146e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10792496, + "step": 21920 + }, + { + "epoch": 2.89362544542695, + "grad_norm": 0.007360459771007299, + "learning_rate": 9.000115904160811e-07, + "loss": 0.1505, + "num_input_tokens_seen": 10794864, + "step": 21925 + }, + { + "epoch": 2.8942853372046984, + "grad_norm": 1.597662091255188, + "learning_rate": 8.995532116599636e-07, + "loss": 0.0036, + "num_input_tokens_seen": 10797232, + "step": 21930 + }, + { + "epoch": 2.894945228982447, + "grad_norm": 0.012998878955841064, + "learning_rate": 8.99094854222949e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10799728, + "step": 21935 + }, + { + "epoch": 2.8956051207601954, + "grad_norm": 13.63510513305664, + "learning_rate": 8.986365182023212e-07, + "loss": 0.0537, + "num_input_tokens_seen": 10802224, + "step": 21940 + }, + { + "epoch": 2.8962650125379437, + "grad_norm": 118.9696273803711, + "learning_rate": 8.981782036953583e-07, + "loss": 0.0036, + "num_input_tokens_seen": 10804592, + "step": 21945 + }, + { + "epoch": 2.8969249043156924, + "grad_norm": 0.006649637129157782, + "learning_rate": 8.977199107993345e-07, + "loss": 0.0397, + "num_input_tokens_seen": 10806896, + "step": 21950 + }, + { + "epoch": 2.8975847960934407, + "grad_norm": 2.4541690349578857, + "learning_rate": 8.972616396115194e-07, + "loss": 0.0016, + "num_input_tokens_seen": 10809328, + "step": 21955 + }, + { + "epoch": 2.8982446878711894, + "grad_norm": 0.1047786995768547, + "learning_rate": 8.968033902291764e-07, + "loss": 0.0631, + "num_input_tokens_seen": 10811952, + "step": 21960 + }, + { + "epoch": 2.8989045796489377, + "grad_norm": 0.0030349986627697945, + "learning_rate": 8.963451627495673e-07, + "loss": 0.0689, + "num_input_tokens_seen": 10814256, + "step": 21965 + }, + { + "epoch": 2.899564471426686, + "grad_norm": 17.257110595703125, + "learning_rate": 8.95886957269946e-07, + "loss": 0.0617, + "num_input_tokens_seen": 10816624, + "step": 21970 + }, + { + "epoch": 2.900224363204434, + "grad_norm": 0.013636348769068718, + "learning_rate": 8.954287738875649e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10819184, + "step": 21975 + }, + { + "epoch": 2.900884254982183, + "grad_norm": 0.005858052987605333, + "learning_rate": 8.94970612699669e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10821616, + "step": 21980 + }, + { + "epoch": 2.901544146759931, + "grad_norm": 0.0023866884876042604, + "learning_rate": 8.945124738034998e-07, + "loss": 0.0308, + "num_input_tokens_seen": 10823920, + "step": 21985 + }, + { + "epoch": 2.90220403853768, + "grad_norm": 0.0054891835898160934, + "learning_rate": 8.940543572962944e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10826288, + "step": 21990 + }, + { + "epoch": 2.902863930315428, + "grad_norm": 0.02786225453019142, + "learning_rate": 8.93596263275284e-07, + "loss": 0.0, + "num_input_tokens_seen": 10828464, + "step": 21995 + }, + { + "epoch": 2.9035238220931765, + "grad_norm": 38.37060546875, + "learning_rate": 8.931381918376969e-07, + "loss": 0.1661, + "num_input_tokens_seen": 10830960, + "step": 22000 + }, + { + "epoch": 2.904183713870925, + "grad_norm": 0.0023783042561262846, + "learning_rate": 8.926801430807545e-07, + "loss": 0.0767, + "num_input_tokens_seen": 10833136, + "step": 22005 + }, + { + "epoch": 2.9048436056486735, + "grad_norm": 0.024528319016098976, + "learning_rate": 8.922221171016744e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10835632, + "step": 22010 + }, + { + "epoch": 2.905503497426422, + "grad_norm": 0.10498657077550888, + "learning_rate": 8.917641139976697e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10838064, + "step": 22015 + }, + { + "epoch": 2.9061633892041705, + "grad_norm": 0.03159989044070244, + "learning_rate": 8.913061338659478e-07, + "loss": 0.0798, + "num_input_tokens_seen": 10840368, + "step": 22020 + }, + { + "epoch": 2.9068232809819188, + "grad_norm": 175.38629150390625, + "learning_rate": 8.908481768037119e-07, + "loss": 0.0282, + "num_input_tokens_seen": 10842800, + "step": 22025 + }, + { + "epoch": 2.9074831727596675, + "grad_norm": 0.013397633098065853, + "learning_rate": 8.903902429081603e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10845424, + "step": 22030 + }, + { + "epoch": 2.908143064537416, + "grad_norm": 0.12170317769050598, + "learning_rate": 8.899323322764857e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10847920, + "step": 22035 + }, + { + "epoch": 2.9088029563151645, + "grad_norm": 0.0028501616325229406, + "learning_rate": 8.894744450058767e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10850160, + "step": 22040 + }, + { + "epoch": 2.909462848092913, + "grad_norm": 0.14192311465740204, + "learning_rate": 8.890165811935161e-07, + "loss": 0.0661, + "num_input_tokens_seen": 10852400, + "step": 22045 + }, + { + "epoch": 2.910122739870661, + "grad_norm": 0.051709212362766266, + "learning_rate": 8.885587409365826e-07, + "loss": 0.1177, + "num_input_tokens_seen": 10854832, + "step": 22050 + }, + { + "epoch": 2.91078263164841, + "grad_norm": 0.2706301808357239, + "learning_rate": 8.881009243322493e-07, + "loss": 0.0018, + "num_input_tokens_seen": 10857456, + "step": 22055 + }, + { + "epoch": 2.911442523426158, + "grad_norm": 1.0978302955627441, + "learning_rate": 8.876431314776847e-07, + "loss": 0.0011, + "num_input_tokens_seen": 10860080, + "step": 22060 + }, + { + "epoch": 2.912102415203907, + "grad_norm": 0.018388278782367706, + "learning_rate": 8.871853624700517e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10862640, + "step": 22065 + }, + { + "epoch": 2.912762306981655, + "grad_norm": 0.24191296100616455, + "learning_rate": 8.867276174065085e-07, + "loss": 0.0494, + "num_input_tokens_seen": 10864880, + "step": 22070 + }, + { + "epoch": 2.9134221987594033, + "grad_norm": 0.009454427286982536, + "learning_rate": 8.862698963842084e-07, + "loss": 0.0501, + "num_input_tokens_seen": 10867056, + "step": 22075 + }, + { + "epoch": 2.914082090537152, + "grad_norm": 0.0265056062489748, + "learning_rate": 8.85812199500299e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10869424, + "step": 22080 + }, + { + "epoch": 2.9147419823149003, + "grad_norm": 0.03988128900527954, + "learning_rate": 8.853545268519235e-07, + "loss": 0.0876, + "num_input_tokens_seen": 10871984, + "step": 22085 + }, + { + "epoch": 2.915401874092649, + "grad_norm": 0.004546268377453089, + "learning_rate": 8.848968785362196e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10874352, + "step": 22090 + }, + { + "epoch": 2.9160617658703973, + "grad_norm": 0.913521945476532, + "learning_rate": 8.844392546503195e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10876592, + "step": 22095 + }, + { + "epoch": 2.9167216576481456, + "grad_norm": 0.00627158721908927, + "learning_rate": 8.83981655291351e-07, + "loss": 0.0537, + "num_input_tokens_seen": 10879024, + "step": 22100 + }, + { + "epoch": 2.917381549425894, + "grad_norm": 0.2408558577299118, + "learning_rate": 8.835240805564358e-07, + "loss": 0.0757, + "num_input_tokens_seen": 10881584, + "step": 22105 + }, + { + "epoch": 2.9180414412036426, + "grad_norm": 0.002517815912142396, + "learning_rate": 8.830665305426914e-07, + "loss": 0.0, + "num_input_tokens_seen": 10884144, + "step": 22110 + }, + { + "epoch": 2.918701332981391, + "grad_norm": 0.01214515045285225, + "learning_rate": 8.826090053472291e-07, + "loss": 0.1362, + "num_input_tokens_seen": 10886832, + "step": 22115 + }, + { + "epoch": 2.9193612247591396, + "grad_norm": 0.02185744419693947, + "learning_rate": 8.821515050671547e-07, + "loss": 0.0011, + "num_input_tokens_seen": 10888880, + "step": 22120 + }, + { + "epoch": 2.920021116536888, + "grad_norm": 0.04342570900917053, + "learning_rate": 8.816940297995705e-07, + "loss": 0.0005, + "num_input_tokens_seen": 10891248, + "step": 22125 + }, + { + "epoch": 2.920681008314636, + "grad_norm": 0.005738275591284037, + "learning_rate": 8.812365796415715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10893552, + "step": 22130 + }, + { + "epoch": 2.921340900092385, + "grad_norm": 0.03579311445355415, + "learning_rate": 8.807791546902488e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10895920, + "step": 22135 + }, + { + "epoch": 2.922000791870133, + "grad_norm": 0.00643147761002183, + "learning_rate": 8.803217550426873e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10898608, + "step": 22140 + }, + { + "epoch": 2.922660683647882, + "grad_norm": 0.001240309327840805, + "learning_rate": 8.79864380795966e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10901040, + "step": 22145 + }, + { + "epoch": 2.92332057542563, + "grad_norm": 0.5274344086647034, + "learning_rate": 8.794070320471605e-07, + "loss": 0.094, + "num_input_tokens_seen": 10903536, + "step": 22150 + }, + { + "epoch": 2.9239804672033785, + "grad_norm": 0.03138048201799393, + "learning_rate": 8.789497088933386e-07, + "loss": 0.1084, + "num_input_tokens_seen": 10905968, + "step": 22155 + }, + { + "epoch": 2.924640358981127, + "grad_norm": 0.14140108227729797, + "learning_rate": 8.78492411431565e-07, + "loss": 0.1881, + "num_input_tokens_seen": 10908656, + "step": 22160 + }, + { + "epoch": 2.9253002507588755, + "grad_norm": 0.0537065826356411, + "learning_rate": 8.78035139758897e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10910960, + "step": 22165 + }, + { + "epoch": 2.925960142536624, + "grad_norm": 0.031515542417764664, + "learning_rate": 8.775778939723874e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10913584, + "step": 22170 + }, + { + "epoch": 2.9266200343143725, + "grad_norm": 0.07926079630851746, + "learning_rate": 8.771206741690832e-07, + "loss": 0.0009, + "num_input_tokens_seen": 10916208, + "step": 22175 + }, + { + "epoch": 2.9272799260921207, + "grad_norm": 0.088646300137043, + "learning_rate": 8.76663480446026e-07, + "loss": 0.0003, + "num_input_tokens_seen": 10918704, + "step": 22180 + }, + { + "epoch": 2.9279398178698695, + "grad_norm": 0.018740274012088776, + "learning_rate": 8.762063129002521e-07, + "loss": 0.0007, + "num_input_tokens_seen": 10921200, + "step": 22185 + }, + { + "epoch": 2.9285997096476177, + "grad_norm": 0.17317558825016022, + "learning_rate": 8.757491716287919e-07, + "loss": 0.0567, + "num_input_tokens_seen": 10923568, + "step": 22190 + }, + { + "epoch": 2.9292596014253665, + "grad_norm": 0.17912831902503967, + "learning_rate": 8.752920567286701e-07, + "loss": 0.0356, + "num_input_tokens_seen": 10925872, + "step": 22195 + }, + { + "epoch": 2.9299194932031147, + "grad_norm": 2.7432761192321777, + "learning_rate": 8.748349682969063e-07, + "loss": 0.0273, + "num_input_tokens_seen": 10928496, + "step": 22200 + }, + { + "epoch": 2.930579384980863, + "grad_norm": 0.005701547488570213, + "learning_rate": 8.743779064305139e-07, + "loss": 0.0001, + "num_input_tokens_seen": 10930672, + "step": 22205 + }, + { + "epoch": 2.9312392767586117, + "grad_norm": 0.06518379598855972, + "learning_rate": 8.739208712265015e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10933168, + "step": 22210 + }, + { + "epoch": 2.93189916853636, + "grad_norm": 0.413714736700058, + "learning_rate": 8.734638627818711e-07, + "loss": 0.1551, + "num_input_tokens_seen": 10935472, + "step": 22215 + }, + { + "epoch": 2.9325590603141087, + "grad_norm": 0.017388276755809784, + "learning_rate": 8.730068811936194e-07, + "loss": 0.1055, + "num_input_tokens_seen": 10938288, + "step": 22220 + }, + { + "epoch": 2.933218952091857, + "grad_norm": 0.05265603959560394, + "learning_rate": 8.725499265587376e-07, + "loss": 0.052, + "num_input_tokens_seen": 10940592, + "step": 22225 + }, + { + "epoch": 2.9338788438696053, + "grad_norm": 19.1118221282959, + "learning_rate": 8.720929989742108e-07, + "loss": 0.0675, + "num_input_tokens_seen": 10942832, + "step": 22230 + }, + { + "epoch": 2.9345387356473536, + "grad_norm": 0.4165602922439575, + "learning_rate": 8.71636098537019e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10945456, + "step": 22235 + }, + { + "epoch": 2.9351986274251023, + "grad_norm": 0.017717348411679268, + "learning_rate": 8.711792253441358e-07, + "loss": 0.001, + "num_input_tokens_seen": 10947952, + "step": 22240 + }, + { + "epoch": 2.9358585192028506, + "grad_norm": 0.06295906007289886, + "learning_rate": 8.70722379492529e-07, + "loss": 0.0002, + "num_input_tokens_seen": 10950512, + "step": 22245 + }, + { + "epoch": 2.9365184109805993, + "grad_norm": 0.08168453723192215, + "learning_rate": 8.70265561079161e-07, + "loss": 0.0006, + "num_input_tokens_seen": 10953136, + "step": 22250 + }, + { + "epoch": 2.9371783027583476, + "grad_norm": 0.019058220088481903, + "learning_rate": 8.698087702009882e-07, + "loss": 0.0665, + "num_input_tokens_seen": 10955440, + "step": 22255 + }, + { + "epoch": 2.937838194536096, + "grad_norm": 0.013230009004473686, + "learning_rate": 8.693520069549612e-07, + "loss": 0.0892, + "num_input_tokens_seen": 10958064, + "step": 22260 + }, + { + "epoch": 2.9384980863138446, + "grad_norm": 0.07534804940223694, + "learning_rate": 8.688952714380247e-07, + "loss": 0.0043, + "num_input_tokens_seen": 10960688, + "step": 22265 + }, + { + "epoch": 2.939157978091593, + "grad_norm": 0.040853142738342285, + "learning_rate": 8.684385637471173e-07, + "loss": 0.0382, + "num_input_tokens_seen": 10963120, + "step": 22270 + }, + { + "epoch": 2.9398178698693416, + "grad_norm": 33.396121978759766, + "learning_rate": 8.679818839791721e-07, + "loss": 0.3035, + "num_input_tokens_seen": 10965616, + "step": 22275 + }, + { + "epoch": 2.94047776164709, + "grad_norm": 0.020552687346935272, + "learning_rate": 8.675252322311161e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10968112, + "step": 22280 + }, + { + "epoch": 2.941137653424838, + "grad_norm": 0.04607531800866127, + "learning_rate": 8.670686085998702e-07, + "loss": 0.0023, + "num_input_tokens_seen": 10970544, + "step": 22285 + }, + { + "epoch": 2.941797545202587, + "grad_norm": 0.4696016013622284, + "learning_rate": 8.666120131823499e-07, + "loss": 0.0015, + "num_input_tokens_seen": 10973040, + "step": 22290 + }, + { + "epoch": 2.942457436980335, + "grad_norm": 0.18232782185077667, + "learning_rate": 8.661554460754631e-07, + "loss": 0.1724, + "num_input_tokens_seen": 10975792, + "step": 22295 + }, + { + "epoch": 2.943117328758084, + "grad_norm": 0.33755701780319214, + "learning_rate": 8.656989073761144e-07, + "loss": 0.1864, + "num_input_tokens_seen": 10978416, + "step": 22300 + }, + { + "epoch": 2.943777220535832, + "grad_norm": 0.031122563406825066, + "learning_rate": 8.652423971811992e-07, + "loss": 0.0476, + "num_input_tokens_seen": 10981296, + "step": 22305 + }, + { + "epoch": 2.9444371123135804, + "grad_norm": 36.23896408081055, + "learning_rate": 8.647859155876103e-07, + "loss": 0.1013, + "num_input_tokens_seen": 10983728, + "step": 22310 + }, + { + "epoch": 2.945097004091329, + "grad_norm": 0.04262983053922653, + "learning_rate": 8.643294626922314e-07, + "loss": 0.0019, + "num_input_tokens_seen": 10986352, + "step": 22315 + }, + { + "epoch": 2.9457568958690774, + "grad_norm": 0.02900772914290428, + "learning_rate": 8.638730385919411e-07, + "loss": 0.0014, + "num_input_tokens_seen": 10989040, + "step": 22320 + }, + { + "epoch": 2.946416787646826, + "grad_norm": 0.12718896567821503, + "learning_rate": 8.634166433836132e-07, + "loss": 0.0004, + "num_input_tokens_seen": 10991344, + "step": 22325 + }, + { + "epoch": 2.9470766794245744, + "grad_norm": 636.110595703125, + "learning_rate": 8.629602771641131e-07, + "loss": 0.0746, + "num_input_tokens_seen": 10993712, + "step": 22330 + }, + { + "epoch": 2.9477365712023227, + "grad_norm": 0.0386221781373024, + "learning_rate": 8.625039400303025e-07, + "loss": 0.1069, + "num_input_tokens_seen": 10996016, + "step": 22335 + }, + { + "epoch": 2.9483964629800714, + "grad_norm": 0.06237388774752617, + "learning_rate": 8.620476320790346e-07, + "loss": 0.1457, + "num_input_tokens_seen": 10998512, + "step": 22340 + }, + { + "epoch": 2.9490563547578197, + "grad_norm": 0.05534980446100235, + "learning_rate": 8.615913534071577e-07, + "loss": 0.0385, + "num_input_tokens_seen": 11001200, + "step": 22345 + }, + { + "epoch": 2.9497162465355684, + "grad_norm": 0.583026111125946, + "learning_rate": 8.61135104111514e-07, + "loss": 0.0011, + "num_input_tokens_seen": 11003632, + "step": 22350 + }, + { + "epoch": 2.9503761383133167, + "grad_norm": 0.04361052066087723, + "learning_rate": 8.606788842889387e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11005936, + "step": 22355 + }, + { + "epoch": 2.951036030091065, + "grad_norm": 0.18426087498664856, + "learning_rate": 8.602226940362615e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11008624, + "step": 22360 + }, + { + "epoch": 2.9516959218688132, + "grad_norm": 0.02046630159020424, + "learning_rate": 8.59766533450305e-07, + "loss": 0.0893, + "num_input_tokens_seen": 11010992, + "step": 22365 + }, + { + "epoch": 2.952355813646562, + "grad_norm": 0.22573372721672058, + "learning_rate": 8.593104026278866e-07, + "loss": 0.2024, + "num_input_tokens_seen": 11013680, + "step": 22370 + }, + { + "epoch": 2.9530157054243107, + "grad_norm": 0.07988610863685608, + "learning_rate": 8.588543016658164e-07, + "loss": 0.0256, + "num_input_tokens_seen": 11015984, + "step": 22375 + }, + { + "epoch": 2.953675597202059, + "grad_norm": 0.049433253705501556, + "learning_rate": 8.583982306608984e-07, + "loss": 0.0559, + "num_input_tokens_seen": 11018224, + "step": 22380 + }, + { + "epoch": 2.9543354889798072, + "grad_norm": 0.29679155349731445, + "learning_rate": 8.579421897099307e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11020528, + "step": 22385 + }, + { + "epoch": 2.9549953807575555, + "grad_norm": 0.663558304309845, + "learning_rate": 8.574861789097043e-07, + "loss": 0.0417, + "num_input_tokens_seen": 11023088, + "step": 22390 + }, + { + "epoch": 2.9556552725353042, + "grad_norm": 1.3020586967468262, + "learning_rate": 8.570301983570048e-07, + "loss": 0.0681, + "num_input_tokens_seen": 11025840, + "step": 22395 + }, + { + "epoch": 2.9563151643130525, + "grad_norm": 19.917871475219727, + "learning_rate": 8.565742481486102e-07, + "loss": 0.0693, + "num_input_tokens_seen": 11028016, + "step": 22400 + }, + { + "epoch": 2.9569750560908012, + "grad_norm": 0.037922654300928116, + "learning_rate": 8.561183283812928e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11030192, + "step": 22405 + }, + { + "epoch": 2.9576349478685495, + "grad_norm": 0.06563448905944824, + "learning_rate": 8.556624391518182e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11032688, + "step": 22410 + }, + { + "epoch": 2.958294839646298, + "grad_norm": 0.048501260578632355, + "learning_rate": 8.552065805569457e-07, + "loss": 0.0344, + "num_input_tokens_seen": 11035248, + "step": 22415 + }, + { + "epoch": 2.9589547314240465, + "grad_norm": 0.046624649316072464, + "learning_rate": 8.547507526934281e-07, + "loss": 0.0848, + "num_input_tokens_seen": 11037808, + "step": 22420 + }, + { + "epoch": 2.959614623201795, + "grad_norm": 0.2368771731853485, + "learning_rate": 8.542949556580114e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11039984, + "step": 22425 + }, + { + "epoch": 2.9602745149795435, + "grad_norm": 0.5295007824897766, + "learning_rate": 8.538391895474353e-07, + "loss": 0.0662, + "num_input_tokens_seen": 11042544, + "step": 22430 + }, + { + "epoch": 2.960934406757292, + "grad_norm": 0.8100051283836365, + "learning_rate": 8.533834544584327e-07, + "loss": 0.0357, + "num_input_tokens_seen": 11045168, + "step": 22435 + }, + { + "epoch": 2.96159429853504, + "grad_norm": 0.031259916722774506, + "learning_rate": 8.529277504877301e-07, + "loss": 0.043, + "num_input_tokens_seen": 11047792, + "step": 22440 + }, + { + "epoch": 2.962254190312789, + "grad_norm": 23.24228286743164, + "learning_rate": 8.524720777320476e-07, + "loss": 0.1004, + "num_input_tokens_seen": 11050160, + "step": 22445 + }, + { + "epoch": 2.962914082090537, + "grad_norm": 0.4114581346511841, + "learning_rate": 8.520164362880986e-07, + "loss": 0.0013, + "num_input_tokens_seen": 11052720, + "step": 22450 + }, + { + "epoch": 2.963573973868286, + "grad_norm": 0.01881307363510132, + "learning_rate": 8.515608262525886e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11054960, + "step": 22455 + }, + { + "epoch": 2.964233865646034, + "grad_norm": 0.022714700549840927, + "learning_rate": 8.511052477222189e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11057712, + "step": 22460 + }, + { + "epoch": 2.9648937574237824, + "grad_norm": 0.011891757138073444, + "learning_rate": 8.50649700793682e-07, + "loss": 0.0526, + "num_input_tokens_seen": 11060400, + "step": 22465 + }, + { + "epoch": 2.965553649201531, + "grad_norm": 0.012211556546390057, + "learning_rate": 8.501941855636645e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11063152, + "step": 22470 + }, + { + "epoch": 2.9662135409792794, + "grad_norm": 0.08262446522712708, + "learning_rate": 8.497387021288468e-07, + "loss": 0.0368, + "num_input_tokens_seen": 11065904, + "step": 22475 + }, + { + "epoch": 2.966873432757028, + "grad_norm": 0.07934442162513733, + "learning_rate": 8.492832505859007e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11068272, + "step": 22480 + }, + { + "epoch": 2.9675333245347764, + "grad_norm": 0.05161631479859352, + "learning_rate": 8.488278310314939e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11070640, + "step": 22485 + }, + { + "epoch": 2.9681932163125246, + "grad_norm": 0.010977809317409992, + "learning_rate": 8.483724435622847e-07, + "loss": 0.0015, + "num_input_tokens_seen": 11073136, + "step": 22490 + }, + { + "epoch": 2.968853108090273, + "grad_norm": 0.007353820372372866, + "learning_rate": 8.479170882749269e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11075888, + "step": 22495 + }, + { + "epoch": 2.9695129998680216, + "grad_norm": 0.6406528949737549, + "learning_rate": 8.474617652660657e-07, + "loss": 0.0995, + "num_input_tokens_seen": 11078448, + "step": 22500 + }, + { + "epoch": 2.9701728916457704, + "grad_norm": 0.007448363117873669, + "learning_rate": 8.470064746323399e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11081072, + "step": 22505 + }, + { + "epoch": 2.9708327834235186, + "grad_norm": 0.01633693464100361, + "learning_rate": 8.465512164703823e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11083504, + "step": 22510 + }, + { + "epoch": 2.971492675201267, + "grad_norm": 0.002468445338308811, + "learning_rate": 8.460959908768173e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11085872, + "step": 22515 + }, + { + "epoch": 2.972152566979015, + "grad_norm": 0.00488969637081027, + "learning_rate": 8.456407979482645e-07, + "loss": 0.0751, + "num_input_tokens_seen": 11088368, + "step": 22520 + }, + { + "epoch": 2.972812458756764, + "grad_norm": 0.05592549219727516, + "learning_rate": 8.451856377813342e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11090864, + "step": 22525 + }, + { + "epoch": 2.973472350534512, + "grad_norm": 0.007001897785812616, + "learning_rate": 8.44730510472631e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11093488, + "step": 22530 + }, + { + "epoch": 2.974132242312261, + "grad_norm": 0.1100287213921547, + "learning_rate": 8.442754161187528e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11095792, + "step": 22535 + }, + { + "epoch": 2.974792134090009, + "grad_norm": 0.005934002343565226, + "learning_rate": 8.438203548162898e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11098288, + "step": 22540 + }, + { + "epoch": 2.9754520258677575, + "grad_norm": 0.013678577728569508, + "learning_rate": 8.433653266618255e-07, + "loss": 0.0257, + "num_input_tokens_seen": 11100528, + "step": 22545 + }, + { + "epoch": 2.976111917645506, + "grad_norm": 0.004507725592702627, + "learning_rate": 8.429103317519366e-07, + "loss": 0.0707, + "num_input_tokens_seen": 11103152, + "step": 22550 + }, + { + "epoch": 2.9767718094232545, + "grad_norm": 0.06873472779989243, + "learning_rate": 8.424553701831919e-07, + "loss": 0.0073, + "num_input_tokens_seen": 11105840, + "step": 22555 + }, + { + "epoch": 2.977431701201003, + "grad_norm": 0.006153427064418793, + "learning_rate": 8.420004420521542e-07, + "loss": 0.0646, + "num_input_tokens_seen": 11107952, + "step": 22560 + }, + { + "epoch": 2.9780915929787515, + "grad_norm": 0.004110193345695734, + "learning_rate": 8.415455474553784e-07, + "loss": 0.0, + "num_input_tokens_seen": 11110384, + "step": 22565 + }, + { + "epoch": 2.9787514847564998, + "grad_norm": 0.0027430232148617506, + "learning_rate": 8.41090686489413e-07, + "loss": 0.0783, + "num_input_tokens_seen": 11112944, + "step": 22570 + }, + { + "epoch": 2.9794113765342485, + "grad_norm": 0.03169501945376396, + "learning_rate": 8.406358592507985e-07, + "loss": 0.1547, + "num_input_tokens_seen": 11115312, + "step": 22575 + }, + { + "epoch": 2.9800712683119968, + "grad_norm": 56.20221710205078, + "learning_rate": 8.401810658360686e-07, + "loss": 0.1548, + "num_input_tokens_seen": 11117872, + "step": 22580 + }, + { + "epoch": 2.9807311600897455, + "grad_norm": 0.022906135767698288, + "learning_rate": 8.397263063417506e-07, + "loss": 0.0782, + "num_input_tokens_seen": 11120624, + "step": 22585 + }, + { + "epoch": 2.9813910518674938, + "grad_norm": 0.016489777714014053, + "learning_rate": 8.39271580864363e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11123120, + "step": 22590 + }, + { + "epoch": 2.982050943645242, + "grad_norm": 25.647974014282227, + "learning_rate": 8.388168895004189e-07, + "loss": 0.071, + "num_input_tokens_seen": 11125552, + "step": 22595 + }, + { + "epoch": 2.9827108354229908, + "grad_norm": 20.744800567626953, + "learning_rate": 8.383622323464226e-07, + "loss": 0.1256, + "num_input_tokens_seen": 11128176, + "step": 22600 + }, + { + "epoch": 2.983370727200739, + "grad_norm": 11.403419494628906, + "learning_rate": 8.379076094988718e-07, + "loss": 0.0751, + "num_input_tokens_seen": 11130480, + "step": 22605 + }, + { + "epoch": 2.9840306189784878, + "grad_norm": 0.13708731532096863, + "learning_rate": 8.374530210542575e-07, + "loss": 0.1028, + "num_input_tokens_seen": 11132848, + "step": 22610 + }, + { + "epoch": 2.984690510756236, + "grad_norm": 0.019638191908597946, + "learning_rate": 8.369984671090621e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11135152, + "step": 22615 + }, + { + "epoch": 2.9853504025339843, + "grad_norm": 320.8174133300781, + "learning_rate": 8.365439477597619e-07, + "loss": 0.019, + "num_input_tokens_seen": 11137648, + "step": 22620 + }, + { + "epoch": 2.986010294311733, + "grad_norm": 0.07475064694881439, + "learning_rate": 8.360894631028254e-07, + "loss": 0.0387, + "num_input_tokens_seen": 11139888, + "step": 22625 + }, + { + "epoch": 2.9866701860894813, + "grad_norm": 0.010483183898031712, + "learning_rate": 8.356350132347127e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11142512, + "step": 22630 + }, + { + "epoch": 2.98733007786723, + "grad_norm": 1.2646681070327759, + "learning_rate": 8.351805982518788e-07, + "loss": 0.0013, + "num_input_tokens_seen": 11144816, + "step": 22635 + }, + { + "epoch": 2.9879899696449783, + "grad_norm": 0.13110283017158508, + "learning_rate": 8.347262182507688e-07, + "loss": 0.0283, + "num_input_tokens_seen": 11147312, + "step": 22640 + }, + { + "epoch": 2.9886498614227266, + "grad_norm": 0.06564207375049591, + "learning_rate": 8.342718733278228e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11149552, + "step": 22645 + }, + { + "epoch": 2.989309753200475, + "grad_norm": 0.24977436661720276, + "learning_rate": 8.338175635794713e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11151984, + "step": 22650 + }, + { + "epoch": 2.9899696449782236, + "grad_norm": 0.02693694457411766, + "learning_rate": 8.333632891021383e-07, + "loss": 0.1013, + "num_input_tokens_seen": 11154544, + "step": 22655 + }, + { + "epoch": 2.990629536755972, + "grad_norm": 0.034543056041002274, + "learning_rate": 8.32909049992241e-07, + "loss": 0.0648, + "num_input_tokens_seen": 11157040, + "step": 22660 + }, + { + "epoch": 2.9912894285337206, + "grad_norm": 0.0038696222472935915, + "learning_rate": 8.324548463461871e-07, + "loss": 0.1119, + "num_input_tokens_seen": 11159408, + "step": 22665 + }, + { + "epoch": 2.991949320311469, + "grad_norm": 0.06301067024469376, + "learning_rate": 8.320006782603797e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11161968, + "step": 22670 + }, + { + "epoch": 2.992609212089217, + "grad_norm": 0.021865667775273323, + "learning_rate": 8.315465458312114e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11164336, + "step": 22675 + }, + { + "epoch": 2.993269103866966, + "grad_norm": 0.0017529904143884778, + "learning_rate": 8.310924491550688e-07, + "loss": 0.0551, + "num_input_tokens_seen": 11166832, + "step": 22680 + }, + { + "epoch": 2.993928995644714, + "grad_norm": 0.01234086137264967, + "learning_rate": 8.306383883283308e-07, + "loss": 0.0014, + "num_input_tokens_seen": 11168880, + "step": 22685 + }, + { + "epoch": 2.994588887422463, + "grad_norm": 0.05340511351823807, + "learning_rate": 8.301843634473683e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11171184, + "step": 22690 + }, + { + "epoch": 2.995248779200211, + "grad_norm": 0.029869886115193367, + "learning_rate": 8.297303746085452e-07, + "loss": 0.0201, + "num_input_tokens_seen": 11173680, + "step": 22695 + }, + { + "epoch": 2.9959086709779594, + "grad_norm": 171.3265380859375, + "learning_rate": 8.292764219082168e-07, + "loss": 0.0418, + "num_input_tokens_seen": 11176240, + "step": 22700 + }, + { + "epoch": 2.996568562755708, + "grad_norm": 0.5562390089035034, + "learning_rate": 8.28822505442732e-07, + "loss": 0.0009, + "num_input_tokens_seen": 11178608, + "step": 22705 + }, + { + "epoch": 2.9972284545334564, + "grad_norm": 0.10550618171691895, + "learning_rate": 8.283686253084306e-07, + "loss": 0.0041, + "num_input_tokens_seen": 11181360, + "step": 22710 + }, + { + "epoch": 2.997888346311205, + "grad_norm": 0.2801491618156433, + "learning_rate": 8.279147816016455e-07, + "loss": 0.0644, + "num_input_tokens_seen": 11183856, + "step": 22715 + }, + { + "epoch": 2.9985482380889534, + "grad_norm": 0.005332210101187229, + "learning_rate": 8.274609744187021e-07, + "loss": 0.0427, + "num_input_tokens_seen": 11186608, + "step": 22720 + }, + { + "epoch": 2.9992081298667017, + "grad_norm": 0.009198754094541073, + "learning_rate": 8.270072038559172e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11188912, + "step": 22725 + }, + { + "epoch": 2.9998680216444504, + "grad_norm": 0.028327500447630882, + "learning_rate": 8.265534700096008e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11191408, + "step": 22730 + }, + { + "epoch": 3.0005279134221987, + "grad_norm": 0.01000028196722269, + "learning_rate": 8.260997729760544e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11193728, + "step": 22735 + }, + { + "epoch": 3.001187805199947, + "grad_norm": 0.016123950481414795, + "learning_rate": 8.256461128515717e-07, + "loss": 0.0352, + "num_input_tokens_seen": 11196096, + "step": 22740 + }, + { + "epoch": 3.001187805199947, + "eval_loss": 0.14833371341228485, + "eval_runtime": 7.9341, + "eval_samples_per_second": 848.868, + "eval_steps_per_second": 106.124, + "num_input_tokens_seen": 11196096, + "step": 22740 + }, + { + "epoch": 3.0018476969776957, + "grad_norm": 0.05220545455813408, + "learning_rate": 8.251924897324392e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11198656, + "step": 22745 + }, + { + "epoch": 3.002507588755444, + "grad_norm": 0.003668104764074087, + "learning_rate": 8.247389037149346e-07, + "loss": 0.0, + "num_input_tokens_seen": 11201088, + "step": 22750 + }, + { + "epoch": 3.0031674805331927, + "grad_norm": 0.039417386054992676, + "learning_rate": 8.242853548953288e-07, + "loss": 0.0, + "num_input_tokens_seen": 11203648, + "step": 22755 + }, + { + "epoch": 3.003827372310941, + "grad_norm": 0.008168110623955727, + "learning_rate": 8.238318433698841e-07, + "loss": 0.0, + "num_input_tokens_seen": 11206400, + "step": 22760 + }, + { + "epoch": 3.0044872640886893, + "grad_norm": 0.00275533483363688, + "learning_rate": 8.233783692348546e-07, + "loss": 0.0, + "num_input_tokens_seen": 11208896, + "step": 22765 + }, + { + "epoch": 3.005147155866438, + "grad_norm": 1.2400000095367432, + "learning_rate": 8.229249325864874e-07, + "loss": 0.0016, + "num_input_tokens_seen": 11211328, + "step": 22770 + }, + { + "epoch": 3.0058070476441863, + "grad_norm": 0.014781933277845383, + "learning_rate": 8.224715335210208e-07, + "loss": 0.0581, + "num_input_tokens_seen": 11214080, + "step": 22775 + }, + { + "epoch": 3.006466939421935, + "grad_norm": 0.0018302809912711382, + "learning_rate": 8.22018172134686e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11216320, + "step": 22780 + }, + { + "epoch": 3.0071268311996833, + "grad_norm": 0.008619059808552265, + "learning_rate": 8.215648485237054e-07, + "loss": 0.0502, + "num_input_tokens_seen": 11218880, + "step": 22785 + }, + { + "epoch": 3.0077867229774315, + "grad_norm": 0.004694989416748285, + "learning_rate": 8.211115627842931e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11221312, + "step": 22790 + }, + { + "epoch": 3.0084466147551803, + "grad_norm": 0.007136012427508831, + "learning_rate": 8.206583150126564e-07, + "loss": 0.0, + "num_input_tokens_seen": 11223488, + "step": 22795 + }, + { + "epoch": 3.0091065065329285, + "grad_norm": 0.10257059335708618, + "learning_rate": 8.202051053049936e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11225728, + "step": 22800 + }, + { + "epoch": 3.009766398310677, + "grad_norm": 0.003423799527809024, + "learning_rate": 8.197519337574953e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11227904, + "step": 22805 + }, + { + "epoch": 3.0104262900884255, + "grad_norm": 0.005957749206572771, + "learning_rate": 8.192988004663442e-07, + "loss": 0.0, + "num_input_tokens_seen": 11230464, + "step": 22810 + }, + { + "epoch": 3.011086181866174, + "grad_norm": 0.011167514137923717, + "learning_rate": 8.188457055277133e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11232960, + "step": 22815 + }, + { + "epoch": 3.0117460736439225, + "grad_norm": 0.002534316387027502, + "learning_rate": 8.183926490377703e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11235456, + "step": 22820 + }, + { + "epoch": 3.012405965421671, + "grad_norm": 0.00074008823139593, + "learning_rate": 8.179396310926719e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11237888, + "step": 22825 + }, + { + "epoch": 3.013065857199419, + "grad_norm": 0.011752812191843987, + "learning_rate": 8.17486651788569e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11240576, + "step": 22830 + }, + { + "epoch": 3.013725748977168, + "grad_norm": 0.022307157516479492, + "learning_rate": 8.170337112216023e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11242752, + "step": 22835 + }, + { + "epoch": 3.014385640754916, + "grad_norm": 0.001109883887693286, + "learning_rate": 8.165808094879054e-07, + "loss": 0.0, + "num_input_tokens_seen": 11245184, + "step": 22840 + }, + { + "epoch": 3.015045532532665, + "grad_norm": 0.024239294230937958, + "learning_rate": 8.161279466836036e-07, + "loss": 0.0, + "num_input_tokens_seen": 11247552, + "step": 22845 + }, + { + "epoch": 3.015705424310413, + "grad_norm": 0.00042449618922546506, + "learning_rate": 8.156751229048132e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11249856, + "step": 22850 + }, + { + "epoch": 3.0163653160881614, + "grad_norm": 0.004139366559684277, + "learning_rate": 8.152223382476438e-07, + "loss": 0.0, + "num_input_tokens_seen": 11252608, + "step": 22855 + }, + { + "epoch": 3.01702520786591, + "grad_norm": 0.00021089478104840964, + "learning_rate": 8.14769592808195e-07, + "loss": 0.0, + "num_input_tokens_seen": 11255040, + "step": 22860 + }, + { + "epoch": 3.0176850996436584, + "grad_norm": 11.633561134338379, + "learning_rate": 8.143168866825583e-07, + "loss": 0.0565, + "num_input_tokens_seen": 11257600, + "step": 22865 + }, + { + "epoch": 3.018344991421407, + "grad_norm": 0.006936497054994106, + "learning_rate": 8.138642199668183e-07, + "loss": 0.0, + "num_input_tokens_seen": 11259904, + "step": 22870 + }, + { + "epoch": 3.0190048831991554, + "grad_norm": 0.15562273561954498, + "learning_rate": 8.134115927570493e-07, + "loss": 0.1032, + "num_input_tokens_seen": 11262272, + "step": 22875 + }, + { + "epoch": 3.0196647749769037, + "grad_norm": 0.002753417706117034, + "learning_rate": 8.129590051493189e-07, + "loss": 0.0014, + "num_input_tokens_seen": 11264512, + "step": 22880 + }, + { + "epoch": 3.0203246667546524, + "grad_norm": 19.26506233215332, + "learning_rate": 8.125064572396851e-07, + "loss": 0.0036, + "num_input_tokens_seen": 11267008, + "step": 22885 + }, + { + "epoch": 3.0209845585324007, + "grad_norm": 0.003096244530752301, + "learning_rate": 8.12053949124198e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11269568, + "step": 22890 + }, + { + "epoch": 3.021644450310149, + "grad_norm": 0.015391573309898376, + "learning_rate": 8.116014808988993e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11272192, + "step": 22895 + }, + { + "epoch": 3.0223043420878977, + "grad_norm": 0.01115776039659977, + "learning_rate": 8.111490526598217e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11275008, + "step": 22900 + }, + { + "epoch": 3.022964233865646, + "grad_norm": 0.005220245569944382, + "learning_rate": 8.106966645029905e-07, + "loss": 0.0367, + "num_input_tokens_seen": 11277376, + "step": 22905 + }, + { + "epoch": 3.0236241256433947, + "grad_norm": 14.08866024017334, + "learning_rate": 8.102443165244213e-07, + "loss": 0.0626, + "num_input_tokens_seen": 11279936, + "step": 22910 + }, + { + "epoch": 3.024284017421143, + "grad_norm": 0.002115392591804266, + "learning_rate": 8.097920088201216e-07, + "loss": 0.0, + "num_input_tokens_seen": 11282432, + "step": 22915 + }, + { + "epoch": 3.0249439091988912, + "grad_norm": 0.0018674953607842326, + "learning_rate": 8.09339741486091e-07, + "loss": 0.0323, + "num_input_tokens_seen": 11285184, + "step": 22920 + }, + { + "epoch": 3.02560380097664, + "grad_norm": 0.006753930356353521, + "learning_rate": 8.088875146183192e-07, + "loss": 0.0, + "num_input_tokens_seen": 11287744, + "step": 22925 + }, + { + "epoch": 3.0262636927543882, + "grad_norm": 0.0023846931289881468, + "learning_rate": 8.084353283127889e-07, + "loss": 0.0, + "num_input_tokens_seen": 11289984, + "step": 22930 + }, + { + "epoch": 3.026923584532137, + "grad_norm": 0.009844399988651276, + "learning_rate": 8.079831826654729e-07, + "loss": 0.0457, + "num_input_tokens_seen": 11292416, + "step": 22935 + }, + { + "epoch": 3.0275834763098852, + "grad_norm": 0.054123781621456146, + "learning_rate": 8.075310777723357e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11295104, + "step": 22940 + }, + { + "epoch": 3.0282433680876335, + "grad_norm": 0.003461797721683979, + "learning_rate": 8.070790137293338e-07, + "loss": 0.0, + "num_input_tokens_seen": 11297280, + "step": 22945 + }, + { + "epoch": 3.0289032598653822, + "grad_norm": 0.005159073509275913, + "learning_rate": 8.066269906324138e-07, + "loss": 0.0783, + "num_input_tokens_seen": 11299648, + "step": 22950 + }, + { + "epoch": 3.0295631516431305, + "grad_norm": 0.19001533091068268, + "learning_rate": 8.061750085775151e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11302080, + "step": 22955 + }, + { + "epoch": 3.030223043420879, + "grad_norm": 10.371720314025879, + "learning_rate": 8.057230676605673e-07, + "loss": 0.0538, + "num_input_tokens_seen": 11304896, + "step": 22960 + }, + { + "epoch": 3.0308829351986275, + "grad_norm": 0.008192854933440685, + "learning_rate": 8.05271167977491e-07, + "loss": 0.0, + "num_input_tokens_seen": 11307328, + "step": 22965 + }, + { + "epoch": 3.031542826976376, + "grad_norm": 0.0040962412022054195, + "learning_rate": 8.048193096241999e-07, + "loss": 0.0, + "num_input_tokens_seen": 11309440, + "step": 22970 + }, + { + "epoch": 3.0322027187541245, + "grad_norm": 0.1099163144826889, + "learning_rate": 8.043674926965962e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11311936, + "step": 22975 + }, + { + "epoch": 3.032862610531873, + "grad_norm": 1.730372667312622, + "learning_rate": 8.039157172905762e-07, + "loss": 0.0834, + "num_input_tokens_seen": 11314560, + "step": 22980 + }, + { + "epoch": 3.033522502309621, + "grad_norm": 0.10862348228693008, + "learning_rate": 8.034639835020251e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11316992, + "step": 22985 + }, + { + "epoch": 3.03418239408737, + "grad_norm": 0.0029926190618425608, + "learning_rate": 8.030122914268198e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11319616, + "step": 22990 + }, + { + "epoch": 3.034842285865118, + "grad_norm": 0.21199586987495422, + "learning_rate": 8.025606411608299e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11322112, + "step": 22995 + }, + { + "epoch": 3.035502177642867, + "grad_norm": 0.003737781662493944, + "learning_rate": 8.021090327999135e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11324480, + "step": 23000 + }, + { + "epoch": 3.036162069420615, + "grad_norm": 0.0034118664916604757, + "learning_rate": 8.016574664399225e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11327040, + "step": 23005 + }, + { + "epoch": 3.0368219611983633, + "grad_norm": 0.007330165710300207, + "learning_rate": 8.012059421766972e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11329408, + "step": 23010 + }, + { + "epoch": 3.037481852976112, + "grad_norm": 0.025925220921635628, + "learning_rate": 8.007544601060719e-07, + "loss": 0.0, + "num_input_tokens_seen": 11332032, + "step": 23015 + }, + { + "epoch": 3.0381417447538603, + "grad_norm": 0.12799876928329468, + "learning_rate": 8.003030203238694e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11334528, + "step": 23020 + }, + { + "epoch": 3.0388016365316086, + "grad_norm": 0.0025362554006278515, + "learning_rate": 7.998516229259045e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11337088, + "step": 23025 + }, + { + "epoch": 3.0394615283093573, + "grad_norm": 0.008501172065734863, + "learning_rate": 7.994002680079835e-07, + "loss": 0.0, + "num_input_tokens_seen": 11339584, + "step": 23030 + }, + { + "epoch": 3.0401214200871056, + "grad_norm": 0.00011658469156827778, + "learning_rate": 7.989489556659028e-07, + "loss": 0.0, + "num_input_tokens_seen": 11342144, + "step": 23035 + }, + { + "epoch": 3.0407813118648543, + "grad_norm": 0.0857052430510521, + "learning_rate": 7.984976859954506e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11345024, + "step": 23040 + }, + { + "epoch": 3.0414412036426026, + "grad_norm": 0.04220561310648918, + "learning_rate": 7.980464590924054e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11347456, + "step": 23045 + }, + { + "epoch": 3.042101095420351, + "grad_norm": 0.032353613525629044, + "learning_rate": 7.975952750525366e-07, + "loss": 0.02, + "num_input_tokens_seen": 11349760, + "step": 23050 + }, + { + "epoch": 3.0427609871980996, + "grad_norm": 0.0022301196586340666, + "learning_rate": 7.97144133971605e-07, + "loss": 0.0, + "num_input_tokens_seen": 11352512, + "step": 23055 + }, + { + "epoch": 3.043420878975848, + "grad_norm": 0.0006173693109303713, + "learning_rate": 7.966930359453619e-07, + "loss": 0.0, + "num_input_tokens_seen": 11354944, + "step": 23060 + }, + { + "epoch": 3.0440807707535966, + "grad_norm": 0.005107264034450054, + "learning_rate": 7.9624198106955e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11357312, + "step": 23065 + }, + { + "epoch": 3.044740662531345, + "grad_norm": 0.0043373508378863335, + "learning_rate": 7.957909694399019e-07, + "loss": 0.1689, + "num_input_tokens_seen": 11359936, + "step": 23070 + }, + { + "epoch": 3.045400554309093, + "grad_norm": 0.0003471885866019875, + "learning_rate": 7.953400011521417e-07, + "loss": 0.0, + "num_input_tokens_seen": 11362240, + "step": 23075 + }, + { + "epoch": 3.046060446086842, + "grad_norm": 0.2935470640659332, + "learning_rate": 7.948890763019845e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11364608, + "step": 23080 + }, + { + "epoch": 3.04672033786459, + "grad_norm": 0.009840859100222588, + "learning_rate": 7.944381949851353e-07, + "loss": 0.0, + "num_input_tokens_seen": 11366976, + "step": 23085 + }, + { + "epoch": 3.0473802296423385, + "grad_norm": 0.023066749796271324, + "learning_rate": 7.939873572972908e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11369408, + "step": 23090 + }, + { + "epoch": 3.048040121420087, + "grad_norm": 0.005535646341741085, + "learning_rate": 7.93536563334138e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11371904, + "step": 23095 + }, + { + "epoch": 3.0487000131978355, + "grad_norm": 0.08973933756351471, + "learning_rate": 7.930858131913541e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11374656, + "step": 23100 + }, + { + "epoch": 3.049359904975584, + "grad_norm": 0.0007211986230686307, + "learning_rate": 7.926351069646084e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11376832, + "step": 23105 + }, + { + "epoch": 3.0500197967533325, + "grad_norm": 0.0014363115187734365, + "learning_rate": 7.921844447495594e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11379264, + "step": 23110 + }, + { + "epoch": 3.0506796885310807, + "grad_norm": 0.018530454486608505, + "learning_rate": 7.917338266418573e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11381504, + "step": 23115 + }, + { + "epoch": 3.0513395803088295, + "grad_norm": 0.0006127398228272796, + "learning_rate": 7.912832527371426e-07, + "loss": 0.0006, + "num_input_tokens_seen": 11384320, + "step": 23120 + }, + { + "epoch": 3.0519994720865777, + "grad_norm": 0.023522265255451202, + "learning_rate": 7.908327231310454e-07, + "loss": 0.0, + "num_input_tokens_seen": 11386752, + "step": 23125 + }, + { + "epoch": 3.0526593638643265, + "grad_norm": 0.15402010083198547, + "learning_rate": 7.903822379191885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11389120, + "step": 23130 + }, + { + "epoch": 3.0533192556420747, + "grad_norm": 0.04021283984184265, + "learning_rate": 7.899317971971835e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11391680, + "step": 23135 + }, + { + "epoch": 3.053979147419823, + "grad_norm": 0.006357967387884855, + "learning_rate": 7.894814010606336e-07, + "loss": 0.0, + "num_input_tokens_seen": 11394176, + "step": 23140 + }, + { + "epoch": 3.0546390391975717, + "grad_norm": 0.00018174726574216038, + "learning_rate": 7.890310496051319e-07, + "loss": 0.0, + "num_input_tokens_seen": 11396480, + "step": 23145 + }, + { + "epoch": 3.05529893097532, + "grad_norm": 0.0018396085361018777, + "learning_rate": 7.885807429262616e-07, + "loss": 0.0, + "num_input_tokens_seen": 11399104, + "step": 23150 + }, + { + "epoch": 3.0559588227530683, + "grad_norm": 0.0608244314789772, + "learning_rate": 7.881304811195985e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11401664, + "step": 23155 + }, + { + "epoch": 3.056618714530817, + "grad_norm": 0.0004480116767808795, + "learning_rate": 7.876802642807056e-07, + "loss": 0.0, + "num_input_tokens_seen": 11403968, + "step": 23160 + }, + { + "epoch": 3.0572786063085653, + "grad_norm": 0.024656491354107857, + "learning_rate": 7.8723009250514e-07, + "loss": 0.0, + "num_input_tokens_seen": 11406720, + "step": 23165 + }, + { + "epoch": 3.057938498086314, + "grad_norm": 0.00041712072561495006, + "learning_rate": 7.867799658884462e-07, + "loss": 0.0054, + "num_input_tokens_seen": 11409472, + "step": 23170 + }, + { + "epoch": 3.0585983898640623, + "grad_norm": 0.0006589332479052246, + "learning_rate": 7.863298845261603e-07, + "loss": 0.0196, + "num_input_tokens_seen": 11412160, + "step": 23175 + }, + { + "epoch": 3.0592582816418106, + "grad_norm": 8.614584658062086e-05, + "learning_rate": 7.858798485138095e-07, + "loss": 0.0, + "num_input_tokens_seen": 11414528, + "step": 23180 + }, + { + "epoch": 3.0599181734195593, + "grad_norm": 0.05452264845371246, + "learning_rate": 7.854298579469099e-07, + "loss": 0.0, + "num_input_tokens_seen": 11416960, + "step": 23185 + }, + { + "epoch": 3.0605780651973076, + "grad_norm": 0.0005021971301175654, + "learning_rate": 7.849799129209697e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11419584, + "step": 23190 + }, + { + "epoch": 3.0612379569750563, + "grad_norm": 0.0011811050353571773, + "learning_rate": 7.845300135314857e-07, + "loss": 0.0, + "num_input_tokens_seen": 11422016, + "step": 23195 + }, + { + "epoch": 3.0618978487528046, + "grad_norm": 1.7594058513641357, + "learning_rate": 7.840801598739459e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11424512, + "step": 23200 + }, + { + "epoch": 3.062557740530553, + "grad_norm": 1.5043967323435936e-05, + "learning_rate": 7.836303520438288e-07, + "loss": 0.0374, + "num_input_tokens_seen": 11426944, + "step": 23205 + }, + { + "epoch": 3.0632176323083016, + "grad_norm": 7.377984002232552e-05, + "learning_rate": 7.831805901366025e-07, + "loss": 0.0549, + "num_input_tokens_seen": 11429248, + "step": 23210 + }, + { + "epoch": 3.06387752408605, + "grad_norm": 0.00022651898325420916, + "learning_rate": 7.827308742477259e-07, + "loss": 0.0, + "num_input_tokens_seen": 11431872, + "step": 23215 + }, + { + "epoch": 3.064537415863798, + "grad_norm": 0.0007423846400342882, + "learning_rate": 7.822812044726479e-07, + "loss": 0.0, + "num_input_tokens_seen": 11434368, + "step": 23220 + }, + { + "epoch": 3.065197307641547, + "grad_norm": 3.652780287666246e-05, + "learning_rate": 7.818315809068076e-07, + "loss": 0.1095, + "num_input_tokens_seen": 11436800, + "step": 23225 + }, + { + "epoch": 3.065857199419295, + "grad_norm": 0.001282864366658032, + "learning_rate": 7.813820036456344e-07, + "loss": 0.0, + "num_input_tokens_seen": 11439360, + "step": 23230 + }, + { + "epoch": 3.066517091197044, + "grad_norm": 0.0016766481567174196, + "learning_rate": 7.809324727845478e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11441728, + "step": 23235 + }, + { + "epoch": 3.067176982974792, + "grad_norm": 0.0020113263744860888, + "learning_rate": 7.804829884189576e-07, + "loss": 0.0, + "num_input_tokens_seen": 11444480, + "step": 23240 + }, + { + "epoch": 3.0678368747525404, + "grad_norm": 0.0011261154431849718, + "learning_rate": 7.800335506442635e-07, + "loss": 0.0, + "num_input_tokens_seen": 11447168, + "step": 23245 + }, + { + "epoch": 3.068496766530289, + "grad_norm": 0.0004977425560355186, + "learning_rate": 7.795841595558554e-07, + "loss": 0.0, + "num_input_tokens_seen": 11449856, + "step": 23250 + }, + { + "epoch": 3.0691566583080374, + "grad_norm": 0.0013624058337882161, + "learning_rate": 7.791348152491133e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11452224, + "step": 23255 + }, + { + "epoch": 3.069816550085786, + "grad_norm": 0.0009466035990044475, + "learning_rate": 7.78685517819407e-07, + "loss": 0.0, + "num_input_tokens_seen": 11454848, + "step": 23260 + }, + { + "epoch": 3.0704764418635344, + "grad_norm": 0.0009397098328918219, + "learning_rate": 7.782362673620972e-07, + "loss": 0.0, + "num_input_tokens_seen": 11457088, + "step": 23265 + }, + { + "epoch": 3.0711363336412827, + "grad_norm": 0.10596859455108643, + "learning_rate": 7.777870639725339e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11459520, + "step": 23270 + }, + { + "epoch": 3.0717962254190314, + "grad_norm": 0.00013747526099905372, + "learning_rate": 7.773379077460569e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11461952, + "step": 23275 + }, + { + "epoch": 3.0724561171967797, + "grad_norm": 0.9691517949104309, + "learning_rate": 7.768887987779966e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11464512, + "step": 23280 + }, + { + "epoch": 3.073116008974528, + "grad_norm": 2.7859707188326865e-05, + "learning_rate": 7.764397371636731e-07, + "loss": 0.0, + "num_input_tokens_seen": 11467008, + "step": 23285 + }, + { + "epoch": 3.0737759007522767, + "grad_norm": 0.0008346032118424773, + "learning_rate": 7.759907229983967e-07, + "loss": 0.0, + "num_input_tokens_seen": 11469120, + "step": 23290 + }, + { + "epoch": 3.074435792530025, + "grad_norm": 0.0007078782073222101, + "learning_rate": 7.755417563774673e-07, + "loss": 0.0, + "num_input_tokens_seen": 11471744, + "step": 23295 + }, + { + "epoch": 3.0750956843077737, + "grad_norm": 0.0009659160277806222, + "learning_rate": 7.75092837396174e-07, + "loss": 0.0756, + "num_input_tokens_seen": 11474112, + "step": 23300 + }, + { + "epoch": 3.075755576085522, + "grad_norm": 0.0002244488277938217, + "learning_rate": 7.746439661497981e-07, + "loss": 0.0, + "num_input_tokens_seen": 11476736, + "step": 23305 + }, + { + "epoch": 3.0764154678632702, + "grad_norm": 0.0208339411765337, + "learning_rate": 7.741951427336078e-07, + "loss": 0.0813, + "num_input_tokens_seen": 11479168, + "step": 23310 + }, + { + "epoch": 3.077075359641019, + "grad_norm": 0.0005570108769461513, + "learning_rate": 7.737463672428638e-07, + "loss": 0.0, + "num_input_tokens_seen": 11481664, + "step": 23315 + }, + { + "epoch": 3.0777352514187672, + "grad_norm": 2.2254424038692378e-05, + "learning_rate": 7.732976397728151e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11484160, + "step": 23320 + }, + { + "epoch": 3.078395143196516, + "grad_norm": 0.038763657212257385, + "learning_rate": 7.728489604187001e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11486592, + "step": 23325 + }, + { + "epoch": 3.0790550349742642, + "grad_norm": 0.009633745066821575, + "learning_rate": 7.72400329275749e-07, + "loss": 0.0, + "num_input_tokens_seen": 11489088, + "step": 23330 + }, + { + "epoch": 3.0797149267520125, + "grad_norm": 60.851131439208984, + "learning_rate": 7.719517464391791e-07, + "loss": 0.0252, + "num_input_tokens_seen": 11491392, + "step": 23335 + }, + { + "epoch": 3.0803748185297612, + "grad_norm": 2.9119068130967207e-05, + "learning_rate": 7.715032120042004e-07, + "loss": 0.0016, + "num_input_tokens_seen": 11493760, + "step": 23340 + }, + { + "epoch": 3.0810347103075095, + "grad_norm": 0.0005959281697869301, + "learning_rate": 7.710547260660096e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11496320, + "step": 23345 + }, + { + "epoch": 3.081694602085258, + "grad_norm": 0.00015468845958821476, + "learning_rate": 7.706062887197959e-07, + "loss": 0.0, + "num_input_tokens_seen": 11498688, + "step": 23350 + }, + { + "epoch": 3.0823544938630065, + "grad_norm": 5.800633516628295e-05, + "learning_rate": 7.701579000607362e-07, + "loss": 0.0, + "num_input_tokens_seen": 11501248, + "step": 23355 + }, + { + "epoch": 3.083014385640755, + "grad_norm": 1.4387388546310831e-05, + "learning_rate": 7.697095601839975e-07, + "loss": 0.0, + "num_input_tokens_seen": 11503680, + "step": 23360 + }, + { + "epoch": 3.0836742774185035, + "grad_norm": 0.009254597127437592, + "learning_rate": 7.692612691847373e-07, + "loss": 0.0, + "num_input_tokens_seen": 11506176, + "step": 23365 + }, + { + "epoch": 3.084334169196252, + "grad_norm": 14.855461120605469, + "learning_rate": 7.688130271581015e-07, + "loss": 0.0673, + "num_input_tokens_seen": 11509056, + "step": 23370 + }, + { + "epoch": 3.084994060974, + "grad_norm": 0.004579714499413967, + "learning_rate": 7.68364834199227e-07, + "loss": 0.0, + "num_input_tokens_seen": 11511296, + "step": 23375 + }, + { + "epoch": 3.085653952751749, + "grad_norm": 0.0009248594287782907, + "learning_rate": 7.679166904032389e-07, + "loss": 0.0, + "num_input_tokens_seen": 11513856, + "step": 23380 + }, + { + "epoch": 3.086313844529497, + "grad_norm": 7.695920794503763e-05, + "learning_rate": 7.674685958652525e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11516160, + "step": 23385 + }, + { + "epoch": 3.086973736307246, + "grad_norm": 0.0007654979126527905, + "learning_rate": 7.67020550680373e-07, + "loss": 0.0252, + "num_input_tokens_seen": 11518400, + "step": 23390 + }, + { + "epoch": 3.087633628084994, + "grad_norm": 0.0008947370224632323, + "learning_rate": 7.665725549436942e-07, + "loss": 0.1016, + "num_input_tokens_seen": 11521152, + "step": 23395 + }, + { + "epoch": 3.0882935198627424, + "grad_norm": 0.0005049702012911439, + "learning_rate": 7.661246087503006e-07, + "loss": 0.0427, + "num_input_tokens_seen": 11523776, + "step": 23400 + }, + { + "epoch": 3.088953411640491, + "grad_norm": 0.009187503717839718, + "learning_rate": 7.656767121952651e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11526208, + "step": 23405 + }, + { + "epoch": 3.0896133034182394, + "grad_norm": 0.0003884216712322086, + "learning_rate": 7.652288653736504e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11528704, + "step": 23410 + }, + { + "epoch": 3.0902731951959876, + "grad_norm": 0.001425333321094513, + "learning_rate": 7.647810683805091e-07, + "loss": 0.0, + "num_input_tokens_seen": 11531456, + "step": 23415 + }, + { + "epoch": 3.0909330869737364, + "grad_norm": 0.006533483508974314, + "learning_rate": 7.643333213108827e-07, + "loss": 0.117, + "num_input_tokens_seen": 11533824, + "step": 23420 + }, + { + "epoch": 3.0915929787514846, + "grad_norm": 0.008299448527395725, + "learning_rate": 7.638856242598024e-07, + "loss": 0.0, + "num_input_tokens_seen": 11536384, + "step": 23425 + }, + { + "epoch": 3.0922528705292334, + "grad_norm": 0.002385131549090147, + "learning_rate": 7.634379773222885e-07, + "loss": 0.002, + "num_input_tokens_seen": 11538944, + "step": 23430 + }, + { + "epoch": 3.0929127623069816, + "grad_norm": 0.006455971393734217, + "learning_rate": 7.629903805933506e-07, + "loss": 0.0, + "num_input_tokens_seen": 11541376, + "step": 23435 + }, + { + "epoch": 3.09357265408473, + "grad_norm": 0.0007826373912394047, + "learning_rate": 7.625428341679885e-07, + "loss": 0.0, + "num_input_tokens_seen": 11543872, + "step": 23440 + }, + { + "epoch": 3.0942325458624786, + "grad_norm": 0.0028275828808546066, + "learning_rate": 7.6209533814119e-07, + "loss": 0.0456, + "num_input_tokens_seen": 11546368, + "step": 23445 + }, + { + "epoch": 3.094892437640227, + "grad_norm": 0.0017768917605280876, + "learning_rate": 7.616478926079335e-07, + "loss": 0.0, + "num_input_tokens_seen": 11548928, + "step": 23450 + }, + { + "epoch": 3.0955523294179756, + "grad_norm": 0.00031777186086401343, + "learning_rate": 7.612004976631857e-07, + "loss": 0.0, + "num_input_tokens_seen": 11551680, + "step": 23455 + }, + { + "epoch": 3.096212221195724, + "grad_norm": 0.0034608703572303057, + "learning_rate": 7.607531534019028e-07, + "loss": 0.0, + "num_input_tokens_seen": 11554048, + "step": 23460 + }, + { + "epoch": 3.096872112973472, + "grad_norm": 0.004485815763473511, + "learning_rate": 7.60305859919031e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11556416, + "step": 23465 + }, + { + "epoch": 3.097532004751221, + "grad_norm": 0.0004141594690736383, + "learning_rate": 7.598586173095043e-07, + "loss": 0.0, + "num_input_tokens_seen": 11558912, + "step": 23470 + }, + { + "epoch": 3.098191896528969, + "grad_norm": 0.00151504622772336, + "learning_rate": 7.594114256682473e-07, + "loss": 0.0, + "num_input_tokens_seen": 11561472, + "step": 23475 + }, + { + "epoch": 3.0988517883067175, + "grad_norm": 0.001460838713683188, + "learning_rate": 7.589642850901733e-07, + "loss": 0.0719, + "num_input_tokens_seen": 11563840, + "step": 23480 + }, + { + "epoch": 3.099511680084466, + "grad_norm": 0.580944299697876, + "learning_rate": 7.585171956701837e-07, + "loss": 0.001, + "num_input_tokens_seen": 11566528, + "step": 23485 + }, + { + "epoch": 3.1001715718622145, + "grad_norm": 0.00398764293640852, + "learning_rate": 7.580701575031713e-07, + "loss": 0.0, + "num_input_tokens_seen": 11568640, + "step": 23490 + }, + { + "epoch": 3.100831463639963, + "grad_norm": 0.0029877680353820324, + "learning_rate": 7.576231706840154e-07, + "loss": 0.0009, + "num_input_tokens_seen": 11571136, + "step": 23495 + }, + { + "epoch": 3.1014913554177115, + "grad_norm": 0.0001967934367712587, + "learning_rate": 7.571762353075869e-07, + "loss": 0.0, + "num_input_tokens_seen": 11573568, + "step": 23500 + }, + { + "epoch": 3.1021512471954598, + "grad_norm": 0.0004972056485712528, + "learning_rate": 7.56729351468744e-07, + "loss": 0.0, + "num_input_tokens_seen": 11576064, + "step": 23505 + }, + { + "epoch": 3.1028111389732085, + "grad_norm": 0.011523031629621983, + "learning_rate": 7.562825192623341e-07, + "loss": 0.0722, + "num_input_tokens_seen": 11578496, + "step": 23510 + }, + { + "epoch": 3.1034710307509568, + "grad_norm": 0.0016027435194700956, + "learning_rate": 7.558357387831953e-07, + "loss": 0.0, + "num_input_tokens_seen": 11580992, + "step": 23515 + }, + { + "epoch": 3.1041309225287055, + "grad_norm": 0.0047021047212183475, + "learning_rate": 7.553890101261522e-07, + "loss": 0.0164, + "num_input_tokens_seen": 11583488, + "step": 23520 + }, + { + "epoch": 3.1047908143064538, + "grad_norm": 0.004702253267168999, + "learning_rate": 7.54942333386021e-07, + "loss": 0.0, + "num_input_tokens_seen": 11586240, + "step": 23525 + }, + { + "epoch": 3.105450706084202, + "grad_norm": 0.0065322984009981155, + "learning_rate": 7.544957086576049e-07, + "loss": 0.0, + "num_input_tokens_seen": 11589056, + "step": 23530 + }, + { + "epoch": 3.1061105978619508, + "grad_norm": 0.016910886391997337, + "learning_rate": 7.540491360356965e-07, + "loss": 0.0, + "num_input_tokens_seen": 11591296, + "step": 23535 + }, + { + "epoch": 3.106770489639699, + "grad_norm": 0.015758175402879715, + "learning_rate": 7.53602615615078e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11593856, + "step": 23540 + }, + { + "epoch": 3.1074303814174478, + "grad_norm": 0.007520774845033884, + "learning_rate": 7.5315614749052e-07, + "loss": 0.0, + "num_input_tokens_seen": 11596096, + "step": 23545 + }, + { + "epoch": 3.108090273195196, + "grad_norm": 0.0007047753897495568, + "learning_rate": 7.527097317567824e-07, + "loss": 0.0, + "num_input_tokens_seen": 11598592, + "step": 23550 + }, + { + "epoch": 3.1087501649729443, + "grad_norm": 0.000877385726198554, + "learning_rate": 7.522633685086135e-07, + "loss": 0.063, + "num_input_tokens_seen": 11601088, + "step": 23555 + }, + { + "epoch": 3.109410056750693, + "grad_norm": 0.0026626859325915575, + "learning_rate": 7.518170578407505e-07, + "loss": 0.0026, + "num_input_tokens_seen": 11603712, + "step": 23560 + }, + { + "epoch": 3.1100699485284413, + "grad_norm": 0.0020679160952568054, + "learning_rate": 7.513707998479199e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11606080, + "step": 23565 + }, + { + "epoch": 3.1107298403061896, + "grad_norm": 0.0006194873712956905, + "learning_rate": 7.509245946248363e-07, + "loss": 0.0, + "num_input_tokens_seen": 11608192, + "step": 23570 + }, + { + "epoch": 3.1113897320839383, + "grad_norm": 0.0005135766696184874, + "learning_rate": 7.504784422662042e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11610496, + "step": 23575 + }, + { + "epoch": 3.1120496238616866, + "grad_norm": 0.11313286423683167, + "learning_rate": 7.500323428667159e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11613120, + "step": 23580 + }, + { + "epoch": 3.1127095156394353, + "grad_norm": 15.523405075073242, + "learning_rate": 7.495862965210525e-07, + "loss": 0.0337, + "num_input_tokens_seen": 11615296, + "step": 23585 + }, + { + "epoch": 3.1133694074171836, + "grad_norm": 0.0008880163659341633, + "learning_rate": 7.491403033238844e-07, + "loss": 0.0008, + "num_input_tokens_seen": 11617600, + "step": 23590 + }, + { + "epoch": 3.114029299194932, + "grad_norm": 0.0013785932678729296, + "learning_rate": 7.4869436336987e-07, + "loss": 0.0323, + "num_input_tokens_seen": 11619968, + "step": 23595 + }, + { + "epoch": 3.1146891909726806, + "grad_norm": 0.007156513165682554, + "learning_rate": 7.482484767536576e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11622336, + "step": 23600 + }, + { + "epoch": 3.115349082750429, + "grad_norm": 0.03351299464702606, + "learning_rate": 7.478026435698827e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11624896, + "step": 23605 + }, + { + "epoch": 3.116008974528177, + "grad_norm": 1.2239532470703125, + "learning_rate": 7.473568639131706e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11627264, + "step": 23610 + }, + { + "epoch": 3.116668866305926, + "grad_norm": 0.24902966618537903, + "learning_rate": 7.469111378781346e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11630016, + "step": 23615 + }, + { + "epoch": 3.117328758083674, + "grad_norm": 0.0001732937671476975, + "learning_rate": 7.464654655593767e-07, + "loss": 0.1031, + "num_input_tokens_seen": 11632448, + "step": 23620 + }, + { + "epoch": 3.117988649861423, + "grad_norm": 0.002056445460766554, + "learning_rate": 7.46019847051488e-07, + "loss": 0.0, + "num_input_tokens_seen": 11634944, + "step": 23625 + }, + { + "epoch": 3.118648541639171, + "grad_norm": 0.0006751983892172575, + "learning_rate": 7.455742824490477e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11637632, + "step": 23630 + }, + { + "epoch": 3.1193084334169194, + "grad_norm": 0.0006119022727943957, + "learning_rate": 7.45128771846623e-07, + "loss": 0.0, + "num_input_tokens_seen": 11640192, + "step": 23635 + }, + { + "epoch": 3.119968325194668, + "grad_norm": 0.0014791876310482621, + "learning_rate": 7.446833153387714e-07, + "loss": 0.0, + "num_input_tokens_seen": 11642880, + "step": 23640 + }, + { + "epoch": 3.1206282169724164, + "grad_norm": 0.001501706661656499, + "learning_rate": 7.442379130200369e-07, + "loss": 0.0, + "num_input_tokens_seen": 11645184, + "step": 23645 + }, + { + "epoch": 3.121288108750165, + "grad_norm": 0.0006226670229807496, + "learning_rate": 7.437925649849534e-07, + "loss": 0.0, + "num_input_tokens_seen": 11647552, + "step": 23650 + }, + { + "epoch": 3.1219480005279134, + "grad_norm": 34.658870697021484, + "learning_rate": 7.433472713280426e-07, + "loss": 0.0613, + "num_input_tokens_seen": 11650240, + "step": 23655 + }, + { + "epoch": 3.1226078923056617, + "grad_norm": 1.7755703926086426, + "learning_rate": 7.42902032143815e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11652672, + "step": 23660 + }, + { + "epoch": 3.1232677840834104, + "grad_norm": 0.0004984392435289919, + "learning_rate": 7.424568475267697e-07, + "loss": 0.0, + "num_input_tokens_seen": 11654848, + "step": 23665 + }, + { + "epoch": 3.1239276758611587, + "grad_norm": 9.023253369377926e-05, + "learning_rate": 7.42011717571393e-07, + "loss": 0.0, + "num_input_tokens_seen": 11656896, + "step": 23670 + }, + { + "epoch": 3.1245875676389074, + "grad_norm": 0.00040223190444521606, + "learning_rate": 7.415666423721613e-07, + "loss": 0.0, + "num_input_tokens_seen": 11659264, + "step": 23675 + }, + { + "epoch": 3.1252474594166557, + "grad_norm": 0.00010142037353944033, + "learning_rate": 7.411216220235381e-07, + "loss": 0.0, + "num_input_tokens_seen": 11661760, + "step": 23680 + }, + { + "epoch": 3.125907351194404, + "grad_norm": 0.0006684021791443229, + "learning_rate": 7.406766566199762e-07, + "loss": 0.0891, + "num_input_tokens_seen": 11664000, + "step": 23685 + }, + { + "epoch": 3.1265672429721527, + "grad_norm": 0.0007115312619134784, + "learning_rate": 7.402317462559163e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11666624, + "step": 23690 + }, + { + "epoch": 3.127227134749901, + "grad_norm": 0.043818097561597824, + "learning_rate": 7.397868910257865e-07, + "loss": 0.0, + "num_input_tokens_seen": 11669376, + "step": 23695 + }, + { + "epoch": 3.1278870265276493, + "grad_norm": 0.0004242084105499089, + "learning_rate": 7.393420910240054e-07, + "loss": 0.0564, + "num_input_tokens_seen": 11672128, + "step": 23700 + }, + { + "epoch": 3.128546918305398, + "grad_norm": 0.0007881783531047404, + "learning_rate": 7.388973463449773e-07, + "loss": 0.0, + "num_input_tokens_seen": 11674496, + "step": 23705 + }, + { + "epoch": 3.1292068100831463, + "grad_norm": 0.00019011733820661902, + "learning_rate": 7.384526570830972e-07, + "loss": 0.0, + "num_input_tokens_seen": 11676992, + "step": 23710 + }, + { + "epoch": 3.129866701860895, + "grad_norm": 0.0006559406756423414, + "learning_rate": 7.380080233327466e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11679744, + "step": 23715 + }, + { + "epoch": 3.1305265936386433, + "grad_norm": 0.003428281983360648, + "learning_rate": 7.375634451882956e-07, + "loss": 0.0087, + "num_input_tokens_seen": 11682048, + "step": 23720 + }, + { + "epoch": 3.1311864854163916, + "grad_norm": 88.51753997802734, + "learning_rate": 7.371189227441031e-07, + "loss": 0.0213, + "num_input_tokens_seen": 11684608, + "step": 23725 + }, + { + "epoch": 3.1318463771941403, + "grad_norm": 0.4643154740333557, + "learning_rate": 7.366744560945155e-07, + "loss": 0.0552, + "num_input_tokens_seen": 11686976, + "step": 23730 + }, + { + "epoch": 3.1325062689718886, + "grad_norm": 0.00041846715612336993, + "learning_rate": 7.362300453338679e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11689280, + "step": 23735 + }, + { + "epoch": 3.133166160749637, + "grad_norm": 0.01871989667415619, + "learning_rate": 7.357856905564832e-07, + "loss": 0.0, + "num_input_tokens_seen": 11691776, + "step": 23740 + }, + { + "epoch": 3.1338260525273856, + "grad_norm": 0.0005692157428711653, + "learning_rate": 7.353413918566721e-07, + "loss": 0.0, + "num_input_tokens_seen": 11694080, + "step": 23745 + }, + { + "epoch": 3.134485944305134, + "grad_norm": 0.00029191409703344107, + "learning_rate": 7.348971493287342e-07, + "loss": 0.0, + "num_input_tokens_seen": 11696640, + "step": 23750 + }, + { + "epoch": 3.1351458360828826, + "grad_norm": 0.00030911393696442246, + "learning_rate": 7.344529630669565e-07, + "loss": 0.0239, + "num_input_tokens_seen": 11699136, + "step": 23755 + }, + { + "epoch": 3.135805727860631, + "grad_norm": 0.0006664734100922942, + "learning_rate": 7.340088331656147e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11701632, + "step": 23760 + }, + { + "epoch": 3.136465619638379, + "grad_norm": 0.0018933032406494021, + "learning_rate": 7.33564759718972e-07, + "loss": 0.0322, + "num_input_tokens_seen": 11704000, + "step": 23765 + }, + { + "epoch": 3.137125511416128, + "grad_norm": 0.0021034155506640673, + "learning_rate": 7.331207428212792e-07, + "loss": 0.0472, + "num_input_tokens_seen": 11706624, + "step": 23770 + }, + { + "epoch": 3.137785403193876, + "grad_norm": 1.0103815793991089, + "learning_rate": 7.326767825667766e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11708864, + "step": 23775 + }, + { + "epoch": 3.138445294971625, + "grad_norm": 0.0008806603727862239, + "learning_rate": 7.322328790496908e-07, + "loss": 0.0215, + "num_input_tokens_seen": 11711296, + "step": 23780 + }, + { + "epoch": 3.139105186749373, + "grad_norm": 0.13981249928474426, + "learning_rate": 7.317890323642375e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11713600, + "step": 23785 + }, + { + "epoch": 3.1397650785271214, + "grad_norm": 0.00162605254445225, + "learning_rate": 7.3134524260462e-07, + "loss": 0.0, + "num_input_tokens_seen": 11716032, + "step": 23790 + }, + { + "epoch": 3.14042497030487, + "grad_norm": 0.0005970995989628136, + "learning_rate": 7.30901509865029e-07, + "loss": 0.0, + "num_input_tokens_seen": 11718592, + "step": 23795 + }, + { + "epoch": 3.1410848620826184, + "grad_norm": 0.008737782947719097, + "learning_rate": 7.304578342396441e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11721280, + "step": 23800 + }, + { + "epoch": 3.141744753860367, + "grad_norm": 0.0008488676394335926, + "learning_rate": 7.300142158226319e-07, + "loss": 0.0014, + "num_input_tokens_seen": 11723904, + "step": 23805 + }, + { + "epoch": 3.1424046456381154, + "grad_norm": 0.001676439307630062, + "learning_rate": 7.295706547081475e-07, + "loss": 0.0, + "num_input_tokens_seen": 11726336, + "step": 23810 + }, + { + "epoch": 3.1430645374158637, + "grad_norm": 0.0002265098737552762, + "learning_rate": 7.291271509903334e-07, + "loss": 0.0, + "num_input_tokens_seen": 11728640, + "step": 23815 + }, + { + "epoch": 3.1437244291936124, + "grad_norm": 0.6636281609535217, + "learning_rate": 7.286837047633195e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11731072, + "step": 23820 + }, + { + "epoch": 3.1443843209713607, + "grad_norm": 0.0003701484529301524, + "learning_rate": 7.282403161212251e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11733568, + "step": 23825 + }, + { + "epoch": 3.145044212749109, + "grad_norm": 17.29320526123047, + "learning_rate": 7.277969851581551e-07, + "loss": 0.1047, + "num_input_tokens_seen": 11736064, + "step": 23830 + }, + { + "epoch": 3.1457041045268577, + "grad_norm": 0.0005500232218764722, + "learning_rate": 7.273537119682045e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11738432, + "step": 23835 + }, + { + "epoch": 3.146363996304606, + "grad_norm": 0.0012078273575752974, + "learning_rate": 7.26910496645454e-07, + "loss": 0.0, + "num_input_tokens_seen": 11741120, + "step": 23840 + }, + { + "epoch": 3.1470238880823547, + "grad_norm": 0.00023791982675902545, + "learning_rate": 7.264673392839726e-07, + "loss": 0.0, + "num_input_tokens_seen": 11743296, + "step": 23845 + }, + { + "epoch": 3.147683779860103, + "grad_norm": 0.010573324747383595, + "learning_rate": 7.260242399778183e-07, + "loss": 0.0411, + "num_input_tokens_seen": 11745792, + "step": 23850 + }, + { + "epoch": 3.1483436716378512, + "grad_norm": 0.0007384056807495654, + "learning_rate": 7.255811988210343e-07, + "loss": 0.0252, + "num_input_tokens_seen": 11747968, + "step": 23855 + }, + { + "epoch": 3.1490035634156, + "grad_norm": 29.7474422454834, + "learning_rate": 7.251382159076544e-07, + "loss": 0.0896, + "num_input_tokens_seen": 11750144, + "step": 23860 + }, + { + "epoch": 3.1496634551933482, + "grad_norm": 0.001195336226373911, + "learning_rate": 7.246952913316977e-07, + "loss": 0.1151, + "num_input_tokens_seen": 11752704, + "step": 23865 + }, + { + "epoch": 3.1503233469710965, + "grad_norm": 0.0011512299533933401, + "learning_rate": 7.242524251871714e-07, + "loss": 0.0766, + "num_input_tokens_seen": 11755072, + "step": 23870 + }, + { + "epoch": 3.1509832387488452, + "grad_norm": 0.0014371996512636542, + "learning_rate": 7.238096175680714e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11757504, + "step": 23875 + }, + { + "epoch": 3.1516431305265935, + "grad_norm": 0.010653997771441936, + "learning_rate": 7.233668685683798e-07, + "loss": 0.0004, + "num_input_tokens_seen": 11759744, + "step": 23880 + }, + { + "epoch": 3.1523030223043422, + "grad_norm": 0.00024848911562003195, + "learning_rate": 7.229241782820673e-07, + "loss": 0.0907, + "num_input_tokens_seen": 11762176, + "step": 23885 + }, + { + "epoch": 3.1529629140820905, + "grad_norm": 0.004329715855419636, + "learning_rate": 7.224815468030916e-07, + "loss": 0.0, + "num_input_tokens_seen": 11764672, + "step": 23890 + }, + { + "epoch": 3.153622805859839, + "grad_norm": 0.012807437218725681, + "learning_rate": 7.220389742253978e-07, + "loss": 0.0, + "num_input_tokens_seen": 11767168, + "step": 23895 + }, + { + "epoch": 3.1542826976375875, + "grad_norm": 13.427555084228516, + "learning_rate": 7.21596460642919e-07, + "loss": 0.0025, + "num_input_tokens_seen": 11769664, + "step": 23900 + }, + { + "epoch": 3.154942589415336, + "grad_norm": 0.00018926402844954282, + "learning_rate": 7.211540061495751e-07, + "loss": 0.0, + "num_input_tokens_seen": 11772352, + "step": 23905 + }, + { + "epoch": 3.1556024811930845, + "grad_norm": 0.017940253019332886, + "learning_rate": 7.207116108392746e-07, + "loss": 0.0087, + "num_input_tokens_seen": 11774656, + "step": 23910 + }, + { + "epoch": 3.156262372970833, + "grad_norm": 0.12874840199947357, + "learning_rate": 7.202692748059121e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11776960, + "step": 23915 + }, + { + "epoch": 3.156922264748581, + "grad_norm": 0.012035480700433254, + "learning_rate": 7.1982699814337e-07, + "loss": 0.061, + "num_input_tokens_seen": 11779456, + "step": 23920 + }, + { + "epoch": 3.15758215652633, + "grad_norm": 0.020318351686000824, + "learning_rate": 7.193847809455192e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11782016, + "step": 23925 + }, + { + "epoch": 3.158242048304078, + "grad_norm": 0.00042746050166897476, + "learning_rate": 7.189426233062161e-07, + "loss": 0.0, + "num_input_tokens_seen": 11784448, + "step": 23930 + }, + { + "epoch": 3.158901940081827, + "grad_norm": 0.004594277124851942, + "learning_rate": 7.185005253193064e-07, + "loss": 0.0, + "num_input_tokens_seen": 11786816, + "step": 23935 + }, + { + "epoch": 3.159561831859575, + "grad_norm": 0.016789058223366737, + "learning_rate": 7.180584870786217e-07, + "loss": 0.0299, + "num_input_tokens_seen": 11789120, + "step": 23940 + }, + { + "epoch": 3.1602217236373233, + "grad_norm": 0.004520804155617952, + "learning_rate": 7.17616508677981e-07, + "loss": 0.0052, + "num_input_tokens_seen": 11791872, + "step": 23945 + }, + { + "epoch": 3.160881615415072, + "grad_norm": 0.0033924004528671503, + "learning_rate": 7.171745902111919e-07, + "loss": 0.0961, + "num_input_tokens_seen": 11794560, + "step": 23950 + }, + { + "epoch": 3.1615415071928203, + "grad_norm": 0.004584446549415588, + "learning_rate": 7.167327317720479e-07, + "loss": 0.0, + "num_input_tokens_seen": 11797120, + "step": 23955 + }, + { + "epoch": 3.1622013989705686, + "grad_norm": 0.08727633208036423, + "learning_rate": 7.162909334543303e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11799680, + "step": 23960 + }, + { + "epoch": 3.1628612907483173, + "grad_norm": 0.011404680088162422, + "learning_rate": 7.158491953518079e-07, + "loss": 0.0, + "num_input_tokens_seen": 11802048, + "step": 23965 + }, + { + "epoch": 3.1635211825260656, + "grad_norm": 72.21094512939453, + "learning_rate": 7.154075175582355e-07, + "loss": 0.0431, + "num_input_tokens_seen": 11804544, + "step": 23970 + }, + { + "epoch": 3.1641810743038143, + "grad_norm": 0.002588431118056178, + "learning_rate": 7.149659001673572e-07, + "loss": 0.0, + "num_input_tokens_seen": 11806976, + "step": 23975 + }, + { + "epoch": 3.1648409660815626, + "grad_norm": 0.0015010889619588852, + "learning_rate": 7.14524343272902e-07, + "loss": 0.0021, + "num_input_tokens_seen": 11809344, + "step": 23980 + }, + { + "epoch": 3.165500857859311, + "grad_norm": 0.0020076995715498924, + "learning_rate": 7.14082846968588e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11811840, + "step": 23985 + }, + { + "epoch": 3.1661607496370596, + "grad_norm": 0.0015450094360858202, + "learning_rate": 7.136414113481191e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11814208, + "step": 23990 + }, + { + "epoch": 3.166820641414808, + "grad_norm": 0.0014535001246258616, + "learning_rate": 7.132000365051873e-07, + "loss": 0.0, + "num_input_tokens_seen": 11816768, + "step": 23995 + }, + { + "epoch": 3.1674805331925566, + "grad_norm": 0.005266868509352207, + "learning_rate": 7.127587225334712e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11819456, + "step": 24000 + }, + { + "epoch": 3.168140424970305, + "grad_norm": 0.007542007602751255, + "learning_rate": 7.123174695266354e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11821760, + "step": 24005 + }, + { + "epoch": 3.168800316748053, + "grad_norm": 0.012107683345675468, + "learning_rate": 7.11876277578334e-07, + "loss": 0.0396, + "num_input_tokens_seen": 11824192, + "step": 24010 + }, + { + "epoch": 3.169460208525802, + "grad_norm": 0.0022753942757844925, + "learning_rate": 7.114351467822058e-07, + "loss": 0.0, + "num_input_tokens_seen": 11826688, + "step": 24015 + }, + { + "epoch": 3.17012010030355, + "grad_norm": 0.0035900247748941183, + "learning_rate": 7.109940772318787e-07, + "loss": 0.0, + "num_input_tokens_seen": 11828864, + "step": 24020 + }, + { + "epoch": 3.1707799920812985, + "grad_norm": 0.0012713761534541845, + "learning_rate": 7.105530690209656e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11831168, + "step": 24025 + }, + { + "epoch": 3.171439883859047, + "grad_norm": 0.021307138726115227, + "learning_rate": 7.101121222430675e-07, + "loss": 0.0626, + "num_input_tokens_seen": 11834176, + "step": 24030 + }, + { + "epoch": 3.1720997756367955, + "grad_norm": 0.01302417553961277, + "learning_rate": 7.096712369917724e-07, + "loss": 0.0, + "num_input_tokens_seen": 11836288, + "step": 24035 + }, + { + "epoch": 3.172759667414544, + "grad_norm": 0.0022673753555864096, + "learning_rate": 7.092304133606544e-07, + "loss": 0.0008, + "num_input_tokens_seen": 11839040, + "step": 24040 + }, + { + "epoch": 3.1734195591922925, + "grad_norm": 0.0007913524750620127, + "learning_rate": 7.087896514432762e-07, + "loss": 0.0, + "num_input_tokens_seen": 11841280, + "step": 24045 + }, + { + "epoch": 3.1740794509700407, + "grad_norm": 12.834890365600586, + "learning_rate": 7.083489513331855e-07, + "loss": 0.0511, + "num_input_tokens_seen": 11843904, + "step": 24050 + }, + { + "epoch": 3.1747393427477895, + "grad_norm": 72.53605651855469, + "learning_rate": 7.079083131239177e-07, + "loss": 0.0128, + "num_input_tokens_seen": 11846336, + "step": 24055 + }, + { + "epoch": 3.1753992345255377, + "grad_norm": 0.008593321777880192, + "learning_rate": 7.074677369089955e-07, + "loss": 0.0008, + "num_input_tokens_seen": 11848576, + "step": 24060 + }, + { + "epoch": 3.1760591263032865, + "grad_norm": 0.0005958180991001427, + "learning_rate": 7.070272227819276e-07, + "loss": 0.0009, + "num_input_tokens_seen": 11850688, + "step": 24065 + }, + { + "epoch": 3.1767190180810347, + "grad_norm": 0.005373831372708082, + "learning_rate": 7.065867708362103e-07, + "loss": 0.0, + "num_input_tokens_seen": 11852992, + "step": 24070 + }, + { + "epoch": 3.177378909858783, + "grad_norm": 0.12464771419763565, + "learning_rate": 7.061463811653261e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11855744, + "step": 24075 + }, + { + "epoch": 3.1780388016365317, + "grad_norm": 0.011009292677044868, + "learning_rate": 7.057060538627445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11858112, + "step": 24080 + }, + { + "epoch": 3.17869869341428, + "grad_norm": 10.30748462677002, + "learning_rate": 7.05265789021922e-07, + "loss": 0.0366, + "num_input_tokens_seen": 11860096, + "step": 24085 + }, + { + "epoch": 3.1793585851920287, + "grad_norm": 0.0007246877066791058, + "learning_rate": 7.048255867363014e-07, + "loss": 0.0, + "num_input_tokens_seen": 11862720, + "step": 24090 + }, + { + "epoch": 3.180018476969777, + "grad_norm": 0.01777108572423458, + "learning_rate": 7.043854470993125e-07, + "loss": 0.0682, + "num_input_tokens_seen": 11865088, + "step": 24095 + }, + { + "epoch": 3.1806783687475253, + "grad_norm": 0.933526337146759, + "learning_rate": 7.039453702043719e-07, + "loss": 0.1339, + "num_input_tokens_seen": 11867712, + "step": 24100 + }, + { + "epoch": 3.181338260525274, + "grad_norm": 0.1766197234392166, + "learning_rate": 7.035053561448825e-07, + "loss": 0.0034, + "num_input_tokens_seen": 11870272, + "step": 24105 + }, + { + "epoch": 3.1819981523030223, + "grad_norm": 0.001022300566546619, + "learning_rate": 7.030654050142341e-07, + "loss": 0.0, + "num_input_tokens_seen": 11872896, + "step": 24110 + }, + { + "epoch": 3.1826580440807706, + "grad_norm": 0.004060187842696905, + "learning_rate": 7.026255169058035e-07, + "loss": 0.0706, + "num_input_tokens_seen": 11875392, + "step": 24115 + }, + { + "epoch": 3.1833179358585193, + "grad_norm": 0.016995996236801147, + "learning_rate": 7.021856919129534e-07, + "loss": 0.0114, + "num_input_tokens_seen": 11877696, + "step": 24120 + }, + { + "epoch": 3.1839778276362676, + "grad_norm": 0.020419931039214134, + "learning_rate": 7.017459301290337e-07, + "loss": 0.0308, + "num_input_tokens_seen": 11880384, + "step": 24125 + }, + { + "epoch": 3.1846377194140163, + "grad_norm": 0.0050578368827700615, + "learning_rate": 7.013062316473803e-07, + "loss": 0.0813, + "num_input_tokens_seen": 11882944, + "step": 24130 + }, + { + "epoch": 3.1852976111917646, + "grad_norm": 0.0038029979914426804, + "learning_rate": 7.008665965613165e-07, + "loss": 0.0, + "num_input_tokens_seen": 11885440, + "step": 24135 + }, + { + "epoch": 3.185957502969513, + "grad_norm": 0.07404931634664536, + "learning_rate": 7.004270249641513e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11887680, + "step": 24140 + }, + { + "epoch": 3.1866173947472616, + "grad_norm": 0.0009840900311246514, + "learning_rate": 6.999875169491808e-07, + "loss": 0.0009, + "num_input_tokens_seen": 11889984, + "step": 24145 + }, + { + "epoch": 3.18727728652501, + "grad_norm": 0.02250547893345356, + "learning_rate": 6.995480726096875e-07, + "loss": 0.0, + "num_input_tokens_seen": 11892224, + "step": 24150 + }, + { + "epoch": 3.187937178302758, + "grad_norm": 40.72771453857422, + "learning_rate": 6.991086920389395e-07, + "loss": 0.0441, + "num_input_tokens_seen": 11894656, + "step": 24155 + }, + { + "epoch": 3.188597070080507, + "grad_norm": 0.051156964153051376, + "learning_rate": 6.986693753301934e-07, + "loss": 0.1136, + "num_input_tokens_seen": 11897216, + "step": 24160 + }, + { + "epoch": 3.189256961858255, + "grad_norm": 0.049732062965631485, + "learning_rate": 6.982301225766897e-07, + "loss": 0.0016, + "num_input_tokens_seen": 11899712, + "step": 24165 + }, + { + "epoch": 3.189916853636004, + "grad_norm": 0.003781419014558196, + "learning_rate": 6.977909338716578e-07, + "loss": 0.0239, + "num_input_tokens_seen": 11902144, + "step": 24170 + }, + { + "epoch": 3.190576745413752, + "grad_norm": 0.00045295763993635774, + "learning_rate": 6.973518093083116e-07, + "loss": 0.099, + "num_input_tokens_seen": 11904640, + "step": 24175 + }, + { + "epoch": 3.1912366371915004, + "grad_norm": 0.0009195672464556992, + "learning_rate": 6.969127489798519e-07, + "loss": 0.0008, + "num_input_tokens_seen": 11907136, + "step": 24180 + }, + { + "epoch": 3.191896528969249, + "grad_norm": 0.0020728004164993763, + "learning_rate": 6.964737529794669e-07, + "loss": 0.0013, + "num_input_tokens_seen": 11909696, + "step": 24185 + }, + { + "epoch": 3.1925564207469974, + "grad_norm": 0.03599919006228447, + "learning_rate": 6.960348214003294e-07, + "loss": 0.0, + "num_input_tokens_seen": 11912064, + "step": 24190 + }, + { + "epoch": 3.193216312524746, + "grad_norm": 0.0029828757978975773, + "learning_rate": 6.955959543356005e-07, + "loss": 0.0, + "num_input_tokens_seen": 11914368, + "step": 24195 + }, + { + "epoch": 3.1938762043024944, + "grad_norm": 0.0022346843034029007, + "learning_rate": 6.951571518784257e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11916736, + "step": 24200 + }, + { + "epoch": 3.1945360960802427, + "grad_norm": 0.010649897158145905, + "learning_rate": 6.947184141219378e-07, + "loss": 0.0, + "num_input_tokens_seen": 11918912, + "step": 24205 + }, + { + "epoch": 3.1951959878579914, + "grad_norm": 0.000551412464119494, + "learning_rate": 6.94279741159256e-07, + "loss": 0.0, + "num_input_tokens_seen": 11921152, + "step": 24210 + }, + { + "epoch": 3.1958558796357397, + "grad_norm": 0.0012428689515218139, + "learning_rate": 6.93841133083485e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11923392, + "step": 24215 + }, + { + "epoch": 3.1965157714134884, + "grad_norm": 24.994014739990234, + "learning_rate": 6.934025899877167e-07, + "loss": 0.0738, + "num_input_tokens_seen": 11925952, + "step": 24220 + }, + { + "epoch": 3.1971756631912367, + "grad_norm": 0.005721225868910551, + "learning_rate": 6.929641119650286e-07, + "loss": 0.0, + "num_input_tokens_seen": 11928576, + "step": 24225 + }, + { + "epoch": 3.197835554968985, + "grad_norm": 0.07588429003953934, + "learning_rate": 6.92525699108484e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11930880, + "step": 24230 + }, + { + "epoch": 3.1984954467467337, + "grad_norm": 0.21324551105499268, + "learning_rate": 6.920873515111336e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11933312, + "step": 24235 + }, + { + "epoch": 3.199155338524482, + "grad_norm": 0.000538547697942704, + "learning_rate": 6.916490692660127e-07, + "loss": 0.0142, + "num_input_tokens_seen": 11935936, + "step": 24240 + }, + { + "epoch": 3.1998152303022303, + "grad_norm": 0.0019838111475110054, + "learning_rate": 6.912108524661443e-07, + "loss": 0.043, + "num_input_tokens_seen": 11938048, + "step": 24245 + }, + { + "epoch": 3.200475122079979, + "grad_norm": 0.0037101011257618666, + "learning_rate": 6.907727012045363e-07, + "loss": 0.0, + "num_input_tokens_seen": 11940480, + "step": 24250 + }, + { + "epoch": 3.2011350138577273, + "grad_norm": 0.0017553389770910144, + "learning_rate": 6.903346155741831e-07, + "loss": 0.0372, + "num_input_tokens_seen": 11942848, + "step": 24255 + }, + { + "epoch": 3.201794905635476, + "grad_norm": 0.008613619953393936, + "learning_rate": 6.898965956680655e-07, + "loss": 0.0, + "num_input_tokens_seen": 11945280, + "step": 24260 + }, + { + "epoch": 3.2024547974132243, + "grad_norm": 0.029970765113830566, + "learning_rate": 6.894586415791497e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11948032, + "step": 24265 + }, + { + "epoch": 3.2031146891909725, + "grad_norm": 0.0053716376423835754, + "learning_rate": 6.890207534003884e-07, + "loss": 0.0007, + "num_input_tokens_seen": 11950464, + "step": 24270 + }, + { + "epoch": 3.2037745809687213, + "grad_norm": 0.0050010415725409985, + "learning_rate": 6.885829312247207e-07, + "loss": 0.0, + "num_input_tokens_seen": 11953216, + "step": 24275 + }, + { + "epoch": 3.2044344727464695, + "grad_norm": 0.006434239447116852, + "learning_rate": 6.881451751450702e-07, + "loss": 0.0, + "num_input_tokens_seen": 11955520, + "step": 24280 + }, + { + "epoch": 3.205094364524218, + "grad_norm": 0.0001354543346678838, + "learning_rate": 6.877074852543483e-07, + "loss": 0.0308, + "num_input_tokens_seen": 11958016, + "step": 24285 + }, + { + "epoch": 3.2057542563019665, + "grad_norm": 0.00035205119638703763, + "learning_rate": 6.872698616454511e-07, + "loss": 0.0, + "num_input_tokens_seen": 11960512, + "step": 24290 + }, + { + "epoch": 3.206414148079715, + "grad_norm": 0.23281724750995636, + "learning_rate": 6.868323044112612e-07, + "loss": 0.0282, + "num_input_tokens_seen": 11962944, + "step": 24295 + }, + { + "epoch": 3.2070740398574635, + "grad_norm": 0.0029322770424187183, + "learning_rate": 6.863948136446468e-07, + "loss": 0.0, + "num_input_tokens_seen": 11965632, + "step": 24300 + }, + { + "epoch": 3.207733931635212, + "grad_norm": 0.00021965963242109865, + "learning_rate": 6.859573894384625e-07, + "loss": 0.075, + "num_input_tokens_seen": 11967872, + "step": 24305 + }, + { + "epoch": 3.20839382341296, + "grad_norm": 0.0003221237566322088, + "learning_rate": 6.855200318855483e-07, + "loss": 0.0236, + "num_input_tokens_seen": 11970048, + "step": 24310 + }, + { + "epoch": 3.209053715190709, + "grad_norm": 0.2113918662071228, + "learning_rate": 6.850827410787295e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11972352, + "step": 24315 + }, + { + "epoch": 3.209713606968457, + "grad_norm": 0.05100065469741821, + "learning_rate": 6.846455171108187e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11974784, + "step": 24320 + }, + { + "epoch": 3.210373498746206, + "grad_norm": 0.011089351028203964, + "learning_rate": 6.842083600746131e-07, + "loss": 0.0, + "num_input_tokens_seen": 11977344, + "step": 24325 + }, + { + "epoch": 3.211033390523954, + "grad_norm": 0.0005579411517828703, + "learning_rate": 6.837712700628967e-07, + "loss": 0.0, + "num_input_tokens_seen": 11979904, + "step": 24330 + }, + { + "epoch": 3.2116932823017024, + "grad_norm": 0.0014182161539793015, + "learning_rate": 6.833342471684383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11982208, + "step": 24335 + }, + { + "epoch": 3.212353174079451, + "grad_norm": 0.0018914632964879274, + "learning_rate": 6.828972914839924e-07, + "loss": 0.0238, + "num_input_tokens_seen": 11984832, + "step": 24340 + }, + { + "epoch": 3.2130130658571994, + "grad_norm": 0.002741429954767227, + "learning_rate": 6.824604031023005e-07, + "loss": 0.0863, + "num_input_tokens_seen": 11987392, + "step": 24345 + }, + { + "epoch": 3.213672957634948, + "grad_norm": 0.5368107557296753, + "learning_rate": 6.820235821160881e-07, + "loss": 0.0002, + "num_input_tokens_seen": 11989632, + "step": 24350 + }, + { + "epoch": 3.2143328494126964, + "grad_norm": 0.01114723365753889, + "learning_rate": 6.815868286180683e-07, + "loss": 0.0001, + "num_input_tokens_seen": 11992064, + "step": 24355 + }, + { + "epoch": 3.2149927411904446, + "grad_norm": 0.052227143198251724, + "learning_rate": 6.811501427009383e-07, + "loss": 0.001, + "num_input_tokens_seen": 11994688, + "step": 24360 + }, + { + "epoch": 3.2156526329681934, + "grad_norm": 0.002823041984811425, + "learning_rate": 6.807135244573814e-07, + "loss": 0.0003, + "num_input_tokens_seen": 11997120, + "step": 24365 + }, + { + "epoch": 3.2163125247459416, + "grad_norm": 0.014946339651942253, + "learning_rate": 6.802769739800669e-07, + "loss": 0.0005, + "num_input_tokens_seen": 11999616, + "step": 24370 + }, + { + "epoch": 3.21697241652369, + "grad_norm": 0.008161872625350952, + "learning_rate": 6.798404913616491e-07, + "loss": 0.0, + "num_input_tokens_seen": 12002176, + "step": 24375 + }, + { + "epoch": 3.2176323083014386, + "grad_norm": 0.0014490735484287143, + "learning_rate": 6.794040766947693e-07, + "loss": 0.0487, + "num_input_tokens_seen": 12004608, + "step": 24380 + }, + { + "epoch": 3.218292200079187, + "grad_norm": 0.0007983733667060733, + "learning_rate": 6.789677300720522e-07, + "loss": 0.0, + "num_input_tokens_seen": 12007104, + "step": 24385 + }, + { + "epoch": 3.2189520918569356, + "grad_norm": 0.20858755707740784, + "learning_rate": 6.785314515861096e-07, + "loss": 0.0, + "num_input_tokens_seen": 12009664, + "step": 24390 + }, + { + "epoch": 3.219611983634684, + "grad_norm": 0.012098542414605618, + "learning_rate": 6.780952413295387e-07, + "loss": 0.0, + "num_input_tokens_seen": 12012032, + "step": 24395 + }, + { + "epoch": 3.220271875412432, + "grad_norm": 0.019903438165783882, + "learning_rate": 6.776590993949217e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12014208, + "step": 24400 + }, + { + "epoch": 3.220931767190181, + "grad_norm": 0.5107465982437134, + "learning_rate": 6.772230258748266e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12016576, + "step": 24405 + }, + { + "epoch": 3.221591658967929, + "grad_norm": 2.904726505279541, + "learning_rate": 6.767870208618071e-07, + "loss": 0.0006, + "num_input_tokens_seen": 12019264, + "step": 24410 + }, + { + "epoch": 3.2222515507456775, + "grad_norm": 5.2059836889384314e-05, + "learning_rate": 6.763510844484015e-07, + "loss": 0.0213, + "num_input_tokens_seen": 12021632, + "step": 24415 + }, + { + "epoch": 3.222911442523426, + "grad_norm": 0.01229359395802021, + "learning_rate": 6.759152167271349e-07, + "loss": 0.0283, + "num_input_tokens_seen": 12024000, + "step": 24420 + }, + { + "epoch": 3.2235713343011745, + "grad_norm": 0.0005444117123261094, + "learning_rate": 6.754794177905165e-07, + "loss": 0.0, + "num_input_tokens_seen": 12026432, + "step": 24425 + }, + { + "epoch": 3.224231226078923, + "grad_norm": 0.0019035936566069722, + "learning_rate": 6.750436877310418e-07, + "loss": 0.0, + "num_input_tokens_seen": 12028672, + "step": 24430 + }, + { + "epoch": 3.2248911178566715, + "grad_norm": 0.3872932195663452, + "learning_rate": 6.746080266411913e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12031488, + "step": 24435 + }, + { + "epoch": 3.2255510096344198, + "grad_norm": 0.12454172223806381, + "learning_rate": 6.741724346134306e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12033920, + "step": 24440 + }, + { + "epoch": 3.2262109014121685, + "grad_norm": 0.00664086127653718, + "learning_rate": 6.737369117402114e-07, + "loss": 0.0044, + "num_input_tokens_seen": 12036224, + "step": 24445 + }, + { + "epoch": 3.2268707931899168, + "grad_norm": 11.390558242797852, + "learning_rate": 6.733014581139699e-07, + "loss": 0.115, + "num_input_tokens_seen": 12038528, + "step": 24450 + }, + { + "epoch": 3.2275306849676655, + "grad_norm": 0.000371633970644325, + "learning_rate": 6.728660738271283e-07, + "loss": 0.0, + "num_input_tokens_seen": 12040896, + "step": 24455 + }, + { + "epoch": 3.2281905767454138, + "grad_norm": 0.0016459020553156734, + "learning_rate": 6.724307589720936e-07, + "loss": 0.0, + "num_input_tokens_seen": 12043008, + "step": 24460 + }, + { + "epoch": 3.228850468523162, + "grad_norm": 0.9386215806007385, + "learning_rate": 6.719955136412582e-07, + "loss": 0.0023, + "num_input_tokens_seen": 12045504, + "step": 24465 + }, + { + "epoch": 3.2295103603009108, + "grad_norm": 0.00015591199917253107, + "learning_rate": 6.715603379269998e-07, + "loss": 0.0, + "num_input_tokens_seen": 12047808, + "step": 24470 + }, + { + "epoch": 3.230170252078659, + "grad_norm": 0.0026150809135288, + "learning_rate": 6.711252319216814e-07, + "loss": 0.0338, + "num_input_tokens_seen": 12050496, + "step": 24475 + }, + { + "epoch": 3.2308301438564078, + "grad_norm": 0.04011273384094238, + "learning_rate": 6.70690195717651e-07, + "loss": 0.0, + "num_input_tokens_seen": 12052864, + "step": 24480 + }, + { + "epoch": 3.231490035634156, + "grad_norm": 0.0008673505508340895, + "learning_rate": 6.70255229407242e-07, + "loss": 0.0, + "num_input_tokens_seen": 12055040, + "step": 24485 + }, + { + "epoch": 3.2321499274119043, + "grad_norm": 0.00220724125392735, + "learning_rate": 6.698203330827722e-07, + "loss": 0.0, + "num_input_tokens_seen": 12057664, + "step": 24490 + }, + { + "epoch": 3.232809819189653, + "grad_norm": 0.0067215790040791035, + "learning_rate": 6.693855068365464e-07, + "loss": 0.1253, + "num_input_tokens_seen": 12059776, + "step": 24495 + }, + { + "epoch": 3.2334697109674013, + "grad_norm": 0.009078079834580421, + "learning_rate": 6.689507507608518e-07, + "loss": 0.0, + "num_input_tokens_seen": 12062336, + "step": 24500 + }, + { + "epoch": 3.2341296027451496, + "grad_norm": 0.0011320833582431078, + "learning_rate": 6.685160649479638e-07, + "loss": 0.0, + "num_input_tokens_seen": 12064512, + "step": 24505 + }, + { + "epoch": 3.2347894945228983, + "grad_norm": 6.545926589751616e-05, + "learning_rate": 6.680814494901406e-07, + "loss": 0.0, + "num_input_tokens_seen": 12067072, + "step": 24510 + }, + { + "epoch": 3.2354493863006466, + "grad_norm": 0.10730508714914322, + "learning_rate": 6.676469044796258e-07, + "loss": 0.0, + "num_input_tokens_seen": 12069376, + "step": 24515 + }, + { + "epoch": 3.2361092780783953, + "grad_norm": 0.005647291895002127, + "learning_rate": 6.672124300086492e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12071872, + "step": 24520 + }, + { + "epoch": 3.2367691698561436, + "grad_norm": 0.0025732112117111683, + "learning_rate": 6.667780261694239e-07, + "loss": 0.0548, + "num_input_tokens_seen": 12074432, + "step": 24525 + }, + { + "epoch": 3.237429061633892, + "grad_norm": 0.00010753106471383944, + "learning_rate": 6.663436930541502e-07, + "loss": 0.0, + "num_input_tokens_seen": 12076672, + "step": 24530 + }, + { + "epoch": 3.2380889534116406, + "grad_norm": 0.012851386331021786, + "learning_rate": 6.659094307550112e-07, + "loss": 0.0, + "num_input_tokens_seen": 12079168, + "step": 24535 + }, + { + "epoch": 3.238748845189389, + "grad_norm": 0.0013802021276205778, + "learning_rate": 6.654752393641763e-07, + "loss": 0.0, + "num_input_tokens_seen": 12081728, + "step": 24540 + }, + { + "epoch": 3.239408736967137, + "grad_norm": 0.006160231772810221, + "learning_rate": 6.650411189737993e-07, + "loss": 0.0, + "num_input_tokens_seen": 12084160, + "step": 24545 + }, + { + "epoch": 3.240068628744886, + "grad_norm": 0.0014017027569934726, + "learning_rate": 6.646070696760192e-07, + "loss": 0.028, + "num_input_tokens_seen": 12086656, + "step": 24550 + }, + { + "epoch": 3.240728520522634, + "grad_norm": 0.0003748885355889797, + "learning_rate": 6.6417309156296e-07, + "loss": 0.0, + "num_input_tokens_seen": 12089024, + "step": 24555 + }, + { + "epoch": 3.241388412300383, + "grad_norm": 0.001053887652233243, + "learning_rate": 6.637391847267302e-07, + "loss": 0.0, + "num_input_tokens_seen": 12091456, + "step": 24560 + }, + { + "epoch": 3.242048304078131, + "grad_norm": 0.014502989128232002, + "learning_rate": 6.633053492594232e-07, + "loss": 0.0, + "num_input_tokens_seen": 12094016, + "step": 24565 + }, + { + "epoch": 3.2427081958558794, + "grad_norm": 0.7106849551200867, + "learning_rate": 6.628715852531179e-07, + "loss": 0.0008, + "num_input_tokens_seen": 12096448, + "step": 24570 + }, + { + "epoch": 3.243368087633628, + "grad_norm": 0.005429383832961321, + "learning_rate": 6.624378927998773e-07, + "loss": 0.0, + "num_input_tokens_seen": 12099008, + "step": 24575 + }, + { + "epoch": 3.2440279794113764, + "grad_norm": 11.243054389953613, + "learning_rate": 6.620042719917495e-07, + "loss": 0.0898, + "num_input_tokens_seen": 12101248, + "step": 24580 + }, + { + "epoch": 3.244687871189125, + "grad_norm": 0.0023134404327720404, + "learning_rate": 6.615707229207674e-07, + "loss": 0.0, + "num_input_tokens_seen": 12103744, + "step": 24585 + }, + { + "epoch": 3.2453477629668734, + "grad_norm": 0.016285087913274765, + "learning_rate": 6.611372456789486e-07, + "loss": 0.0, + "num_input_tokens_seen": 12106496, + "step": 24590 + }, + { + "epoch": 3.2460076547446217, + "grad_norm": 0.003460107371211052, + "learning_rate": 6.607038403582956e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12108928, + "step": 24595 + }, + { + "epoch": 3.2466675465223704, + "grad_norm": 0.010175548493862152, + "learning_rate": 6.602705070507954e-07, + "loss": 0.0, + "num_input_tokens_seen": 12111488, + "step": 24600 + }, + { + "epoch": 3.2473274383001187, + "grad_norm": 0.013270308263599873, + "learning_rate": 6.598372458484202e-07, + "loss": 0.0, + "num_input_tokens_seen": 12114112, + "step": 24605 + }, + { + "epoch": 3.2479873300778674, + "grad_norm": 13.476341247558594, + "learning_rate": 6.594040568431262e-07, + "loss": 0.0266, + "num_input_tokens_seen": 12116352, + "step": 24610 + }, + { + "epoch": 3.2486472218556157, + "grad_norm": 0.004440042190253735, + "learning_rate": 6.589709401268546e-07, + "loss": 0.0909, + "num_input_tokens_seen": 12118976, + "step": 24615 + }, + { + "epoch": 3.249307113633364, + "grad_norm": 0.0023096001241356134, + "learning_rate": 6.585378957915315e-07, + "loss": 0.0822, + "num_input_tokens_seen": 12121216, + "step": 24620 + }, + { + "epoch": 3.2499670054111127, + "grad_norm": 0.004000347573310137, + "learning_rate": 6.581049239290672e-07, + "loss": 0.0, + "num_input_tokens_seen": 12123712, + "step": 24625 + }, + { + "epoch": 3.250626897188861, + "grad_norm": 0.0027472316287457943, + "learning_rate": 6.576720246313572e-07, + "loss": 0.0, + "num_input_tokens_seen": 12126016, + "step": 24630 + }, + { + "epoch": 3.2512867889666097, + "grad_norm": 62.97257995605469, + "learning_rate": 6.57239197990281e-07, + "loss": 0.0352, + "num_input_tokens_seen": 12128448, + "step": 24635 + }, + { + "epoch": 3.2512867889666097, + "eval_loss": 0.18092882633209229, + "eval_runtime": 7.838, + "eval_samples_per_second": 859.279, + "eval_steps_per_second": 107.426, + "num_input_tokens_seen": 12128448, + "step": 24635 + }, + { + "epoch": 3.251946680744358, + "grad_norm": 0.020524688065052032, + "learning_rate": 6.568064440977028e-07, + "loss": 0.0434, + "num_input_tokens_seen": 12130880, + "step": 24640 + }, + { + "epoch": 3.2526065725221063, + "grad_norm": 0.004393074195832014, + "learning_rate": 6.563737630454719e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12133248, + "step": 24645 + }, + { + "epoch": 3.253266464299855, + "grad_norm": 0.0023800248745828867, + "learning_rate": 6.559411549254211e-07, + "loss": 0.0, + "num_input_tokens_seen": 12135488, + "step": 24650 + }, + { + "epoch": 3.2539263560776033, + "grad_norm": 0.5170552730560303, + "learning_rate": 6.55508619829369e-07, + "loss": 0.0661, + "num_input_tokens_seen": 12137920, + "step": 24655 + }, + { + "epoch": 3.2545862478553516, + "grad_norm": 0.006337509024888277, + "learning_rate": 6.550761578491175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12140416, + "step": 24660 + }, + { + "epoch": 3.2552461396331003, + "grad_norm": 0.18528732657432556, + "learning_rate": 6.546437690764539e-07, + "loss": 0.0338, + "num_input_tokens_seen": 12143040, + "step": 24665 + }, + { + "epoch": 3.2559060314108486, + "grad_norm": 0.002924225991591811, + "learning_rate": 6.542114536031498e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12145280, + "step": 24670 + }, + { + "epoch": 3.256565923188597, + "grad_norm": 0.019116446375846863, + "learning_rate": 6.537792115209599e-07, + "loss": 0.0611, + "num_input_tokens_seen": 12147776, + "step": 24675 + }, + { + "epoch": 3.2572258149663456, + "grad_norm": 0.001296358066610992, + "learning_rate": 6.533470429216258e-07, + "loss": 0.0, + "num_input_tokens_seen": 12150272, + "step": 24680 + }, + { + "epoch": 3.257885706744094, + "grad_norm": 0.006655433680862188, + "learning_rate": 6.529149478968709e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12152768, + "step": 24685 + }, + { + "epoch": 3.2585455985218426, + "grad_norm": 0.014014780521392822, + "learning_rate": 6.524829265384058e-07, + "loss": 0.0018, + "num_input_tokens_seen": 12155072, + "step": 24690 + }, + { + "epoch": 3.259205490299591, + "grad_norm": 0.6132182478904724, + "learning_rate": 6.520509789379227e-07, + "loss": 0.0355, + "num_input_tokens_seen": 12157376, + "step": 24695 + }, + { + "epoch": 3.259865382077339, + "grad_norm": 0.013002334162592888, + "learning_rate": 6.516191051870992e-07, + "loss": 0.0019, + "num_input_tokens_seen": 12159616, + "step": 24700 + }, + { + "epoch": 3.260525273855088, + "grad_norm": 0.0011766665847972035, + "learning_rate": 6.511873053775985e-07, + "loss": 0.0296, + "num_input_tokens_seen": 12161920, + "step": 24705 + }, + { + "epoch": 3.261185165632836, + "grad_norm": 0.0003221951483283192, + "learning_rate": 6.507555796010658e-07, + "loss": 0.0564, + "num_input_tokens_seen": 12164160, + "step": 24710 + }, + { + "epoch": 3.261845057410585, + "grad_norm": 0.012531314045190811, + "learning_rate": 6.503239279491328e-07, + "loss": 0.0615, + "num_input_tokens_seen": 12166464, + "step": 24715 + }, + { + "epoch": 3.262504949188333, + "grad_norm": 0.44118887186050415, + "learning_rate": 6.498923505134138e-07, + "loss": 0.0311, + "num_input_tokens_seen": 12168960, + "step": 24720 + }, + { + "epoch": 3.2631648409660814, + "grad_norm": 0.024108566343784332, + "learning_rate": 6.494608473855079e-07, + "loss": 0.0202, + "num_input_tokens_seen": 12171648, + "step": 24725 + }, + { + "epoch": 3.26382473274383, + "grad_norm": 0.0002736593596637249, + "learning_rate": 6.490294186569989e-07, + "loss": 0.0, + "num_input_tokens_seen": 12174400, + "step": 24730 + }, + { + "epoch": 3.2644846245215784, + "grad_norm": 188.8216094970703, + "learning_rate": 6.485980644194541e-07, + "loss": 0.0045, + "num_input_tokens_seen": 12176704, + "step": 24735 + }, + { + "epoch": 3.265144516299327, + "grad_norm": 18.44172477722168, + "learning_rate": 6.481667847644256e-07, + "loss": 0.0608, + "num_input_tokens_seen": 12179008, + "step": 24740 + }, + { + "epoch": 3.2658044080770754, + "grad_norm": 0.003776587313041091, + "learning_rate": 6.477355797834494e-07, + "loss": 0.0, + "num_input_tokens_seen": 12181632, + "step": 24745 + }, + { + "epoch": 3.2664642998548237, + "grad_norm": 0.0007478050538338721, + "learning_rate": 6.473044495680451e-07, + "loss": 0.0, + "num_input_tokens_seen": 12184000, + "step": 24750 + }, + { + "epoch": 3.2671241916325724, + "grad_norm": 0.015562565997242928, + "learning_rate": 6.468733942097178e-07, + "loss": 0.0241, + "num_input_tokens_seen": 12186368, + "step": 24755 + }, + { + "epoch": 3.2677840834103207, + "grad_norm": 0.0008789593121036887, + "learning_rate": 6.464424137999551e-07, + "loss": 0.0, + "num_input_tokens_seen": 12188672, + "step": 24760 + }, + { + "epoch": 3.2684439751880694, + "grad_norm": 0.06179773807525635, + "learning_rate": 6.4601150843023e-07, + "loss": 0.0017, + "num_input_tokens_seen": 12190784, + "step": 24765 + }, + { + "epoch": 3.2691038669658177, + "grad_norm": 0.003880223724991083, + "learning_rate": 6.455806781919988e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12193216, + "step": 24770 + }, + { + "epoch": 3.269763758743566, + "grad_norm": 0.0023615926038473845, + "learning_rate": 6.451499231767021e-07, + "loss": 0.0, + "num_input_tokens_seen": 12195712, + "step": 24775 + }, + { + "epoch": 3.2704236505213147, + "grad_norm": 2.087416410446167, + "learning_rate": 6.447192434757647e-07, + "loss": 0.0023, + "num_input_tokens_seen": 12198016, + "step": 24780 + }, + { + "epoch": 3.271083542299063, + "grad_norm": 0.00017697991279419512, + "learning_rate": 6.442886391805948e-07, + "loss": 0.0, + "num_input_tokens_seen": 12200512, + "step": 24785 + }, + { + "epoch": 3.2717434340768112, + "grad_norm": 0.00011539664410520345, + "learning_rate": 6.438581103825858e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12203200, + "step": 24790 + }, + { + "epoch": 3.27240332585456, + "grad_norm": 0.0006121984915807843, + "learning_rate": 6.434276571731139e-07, + "loss": 0.0, + "num_input_tokens_seen": 12205888, + "step": 24795 + }, + { + "epoch": 3.2730632176323082, + "grad_norm": 0.0016998574137687683, + "learning_rate": 6.429972796435392e-07, + "loss": 0.0006, + "num_input_tokens_seen": 12208448, + "step": 24800 + }, + { + "epoch": 3.2737231094100565, + "grad_norm": 0.0017284862697124481, + "learning_rate": 6.425669778852072e-07, + "loss": 0.0005, + "num_input_tokens_seen": 12210816, + "step": 24805 + }, + { + "epoch": 3.2743830011878052, + "grad_norm": 0.0031804365571588278, + "learning_rate": 6.421367519894454e-07, + "loss": 0.0, + "num_input_tokens_seen": 12213376, + "step": 24810 + }, + { + "epoch": 3.2750428929655535, + "grad_norm": 0.01726776920258999, + "learning_rate": 6.417066020475669e-07, + "loss": 0.0019, + "num_input_tokens_seen": 12216128, + "step": 24815 + }, + { + "epoch": 3.2757027847433022, + "grad_norm": 0.00042145411134697497, + "learning_rate": 6.412765281508677e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12218432, + "step": 24820 + }, + { + "epoch": 3.2763626765210505, + "grad_norm": 0.00022174572222866118, + "learning_rate": 6.408465303906271e-07, + "loss": 0.0, + "num_input_tokens_seen": 12221312, + "step": 24825 + }, + { + "epoch": 3.277022568298799, + "grad_norm": 0.009224276058375835, + "learning_rate": 6.404166088581102e-07, + "loss": 0.0, + "num_input_tokens_seen": 12223680, + "step": 24830 + }, + { + "epoch": 3.2776824600765475, + "grad_norm": 0.00777504313737154, + "learning_rate": 6.399867636445637e-07, + "loss": 0.0487, + "num_input_tokens_seen": 12226368, + "step": 24835 + }, + { + "epoch": 3.278342351854296, + "grad_norm": 3.733914491022006e-05, + "learning_rate": 6.395569948412198e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12228736, + "step": 24840 + }, + { + "epoch": 3.2790022436320445, + "grad_norm": 1.3131993000570219e-05, + "learning_rate": 6.39127302539294e-07, + "loss": 0.0559, + "num_input_tokens_seen": 12231360, + "step": 24845 + }, + { + "epoch": 3.279662135409793, + "grad_norm": 69.62069702148438, + "learning_rate": 6.386976868299844e-07, + "loss": 0.2746, + "num_input_tokens_seen": 12234176, + "step": 24850 + }, + { + "epoch": 3.280322027187541, + "grad_norm": 0.00021059955179225653, + "learning_rate": 6.382681478044749e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12236544, + "step": 24855 + }, + { + "epoch": 3.28098191896529, + "grad_norm": 0.0011798146879300475, + "learning_rate": 6.378386855539311e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12239040, + "step": 24860 + }, + { + "epoch": 3.281641810743038, + "grad_norm": 0.05896616727113724, + "learning_rate": 6.374093001695042e-07, + "loss": 0.08, + "num_input_tokens_seen": 12241408, + "step": 24865 + }, + { + "epoch": 3.282301702520787, + "grad_norm": 0.00966053456068039, + "learning_rate": 6.369799917423277e-07, + "loss": 0.0337, + "num_input_tokens_seen": 12243840, + "step": 24870 + }, + { + "epoch": 3.282961594298535, + "grad_norm": 0.006455364637076855, + "learning_rate": 6.365507603635188e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12246016, + "step": 24875 + }, + { + "epoch": 3.2836214860762833, + "grad_norm": 0.0004605850263033062, + "learning_rate": 6.361216061241792e-07, + "loss": 0.0006, + "num_input_tokens_seen": 12248320, + "step": 24880 + }, + { + "epoch": 3.284281377854032, + "grad_norm": 1.3851643800735474, + "learning_rate": 6.356925291153936e-07, + "loss": 0.0292, + "num_input_tokens_seen": 12251072, + "step": 24885 + }, + { + "epoch": 3.2849412696317803, + "grad_norm": 0.0008965849410742521, + "learning_rate": 6.352635294282309e-07, + "loss": 0.0011, + "num_input_tokens_seen": 12253632, + "step": 24890 + }, + { + "epoch": 3.285601161409529, + "grad_norm": 0.0017181680304929614, + "learning_rate": 6.348346071537427e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12256128, + "step": 24895 + }, + { + "epoch": 3.2862610531872773, + "grad_norm": 0.0009574389550834894, + "learning_rate": 6.344057623829648e-07, + "loss": 0.0, + "num_input_tokens_seen": 12258688, + "step": 24900 + }, + { + "epoch": 3.2869209449650256, + "grad_norm": 0.00013130882871337235, + "learning_rate": 6.339769952069165e-07, + "loss": 0.0, + "num_input_tokens_seen": 12261312, + "step": 24905 + }, + { + "epoch": 3.2875808367427743, + "grad_norm": 0.003466361900791526, + "learning_rate": 6.335483057166002e-07, + "loss": 0.0, + "num_input_tokens_seen": 12263616, + "step": 24910 + }, + { + "epoch": 3.2882407285205226, + "grad_norm": 0.0018649350386112928, + "learning_rate": 6.331196940030026e-07, + "loss": 0.0, + "num_input_tokens_seen": 12266304, + "step": 24915 + }, + { + "epoch": 3.288900620298271, + "grad_norm": 0.0006871359655633569, + "learning_rate": 6.326911601570933e-07, + "loss": 0.0, + "num_input_tokens_seen": 12268608, + "step": 24920 + }, + { + "epoch": 3.2895605120760196, + "grad_norm": 9.006281470647082e-05, + "learning_rate": 6.322627042698251e-07, + "loss": 0.0, + "num_input_tokens_seen": 12271296, + "step": 24925 + }, + { + "epoch": 3.290220403853768, + "grad_norm": 0.019566912204027176, + "learning_rate": 6.318343264321352e-07, + "loss": 0.0, + "num_input_tokens_seen": 12273664, + "step": 24930 + }, + { + "epoch": 3.290880295631516, + "grad_norm": 0.01741475611925125, + "learning_rate": 6.314060267349432e-07, + "loss": 0.0, + "num_input_tokens_seen": 12276224, + "step": 24935 + }, + { + "epoch": 3.291540187409265, + "grad_norm": 0.0002055577642749995, + "learning_rate": 6.309778052691532e-07, + "loss": 0.0551, + "num_input_tokens_seen": 12278656, + "step": 24940 + }, + { + "epoch": 3.292200079187013, + "grad_norm": 0.00037039650487713516, + "learning_rate": 6.305496621256516e-07, + "loss": 0.0, + "num_input_tokens_seen": 12280960, + "step": 24945 + }, + { + "epoch": 3.292859970964762, + "grad_norm": 0.0003575266164261848, + "learning_rate": 6.30121597395309e-07, + "loss": 0.0, + "num_input_tokens_seen": 12283776, + "step": 24950 + }, + { + "epoch": 3.29351986274251, + "grad_norm": 0.00045907212188467383, + "learning_rate": 6.296936111689789e-07, + "loss": 0.0, + "num_input_tokens_seen": 12285952, + "step": 24955 + }, + { + "epoch": 3.2941797545202585, + "grad_norm": 0.01350698247551918, + "learning_rate": 6.292657035374981e-07, + "loss": 0.0, + "num_input_tokens_seen": 12288576, + "step": 24960 + }, + { + "epoch": 3.294839646298007, + "grad_norm": 0.0007901710923761129, + "learning_rate": 6.288378745916873e-07, + "loss": 0.028, + "num_input_tokens_seen": 12291200, + "step": 24965 + }, + { + "epoch": 3.2954995380757555, + "grad_norm": 0.000196945431525819, + "learning_rate": 6.284101244223497e-07, + "loss": 0.0, + "num_input_tokens_seen": 12293568, + "step": 24970 + }, + { + "epoch": 3.296159429853504, + "grad_norm": 0.000786519784014672, + "learning_rate": 6.279824531202725e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12296064, + "step": 24975 + }, + { + "epoch": 3.2968193216312525, + "grad_norm": 0.0034475887659937143, + "learning_rate": 6.275548607762255e-07, + "loss": 0.0266, + "num_input_tokens_seen": 12298688, + "step": 24980 + }, + { + "epoch": 3.2974792134090007, + "grad_norm": 0.004421170800924301, + "learning_rate": 6.271273474809624e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12300992, + "step": 24985 + }, + { + "epoch": 3.2981391051867495, + "grad_norm": 0.031272463500499725, + "learning_rate": 6.266999133252196e-07, + "loss": 0.0, + "num_input_tokens_seen": 12303680, + "step": 24990 + }, + { + "epoch": 3.2987989969644977, + "grad_norm": 6.787193706259131e-05, + "learning_rate": 6.262725583997169e-07, + "loss": 0.0, + "num_input_tokens_seen": 12305728, + "step": 24995 + }, + { + "epoch": 3.2994588887422465, + "grad_norm": 4.479756535147317e-05, + "learning_rate": 6.258452827951576e-07, + "loss": 0.0, + "num_input_tokens_seen": 12308096, + "step": 25000 + }, + { + "epoch": 3.3001187805199947, + "grad_norm": 0.001354401232674718, + "learning_rate": 6.254180866022278e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12310656, + "step": 25005 + }, + { + "epoch": 3.300778672297743, + "grad_norm": 8.70009753271006e-05, + "learning_rate": 6.249909699115958e-07, + "loss": 0.0009, + "num_input_tokens_seen": 12313600, + "step": 25010 + }, + { + "epoch": 3.3014385640754917, + "grad_norm": 0.003904482815414667, + "learning_rate": 6.245639328139156e-07, + "loss": 0.0266, + "num_input_tokens_seen": 12315840, + "step": 25015 + }, + { + "epoch": 3.30209845585324, + "grad_norm": 0.0015482102753594518, + "learning_rate": 6.241369753998213e-07, + "loss": 0.0, + "num_input_tokens_seen": 12318784, + "step": 25020 + }, + { + "epoch": 3.3027583476309887, + "grad_norm": 0.00014941871631890535, + "learning_rate": 6.23710097759933e-07, + "loss": 0.0, + "num_input_tokens_seen": 12321152, + "step": 25025 + }, + { + "epoch": 3.303418239408737, + "grad_norm": 0.0046899099834263325, + "learning_rate": 6.232832999848511e-07, + "loss": 0.0045, + "num_input_tokens_seen": 12323712, + "step": 25030 + }, + { + "epoch": 3.3040781311864853, + "grad_norm": 0.011636439710855484, + "learning_rate": 6.228565821651606e-07, + "loss": 0.0, + "num_input_tokens_seen": 12326272, + "step": 25035 + }, + { + "epoch": 3.304738022964234, + "grad_norm": 0.00013077407493256032, + "learning_rate": 6.224299443914301e-07, + "loss": 0.0, + "num_input_tokens_seen": 12328896, + "step": 25040 + }, + { + "epoch": 3.3053979147419823, + "grad_norm": 0.000527672003954649, + "learning_rate": 6.22003386754209e-07, + "loss": 0.0, + "num_input_tokens_seen": 12331328, + "step": 25045 + }, + { + "epoch": 3.3060578065197306, + "grad_norm": 36.416690826416016, + "learning_rate": 6.215769093440325e-07, + "loss": 0.0323, + "num_input_tokens_seen": 12333568, + "step": 25050 + }, + { + "epoch": 3.3067176982974793, + "grad_norm": 0.0005653111729770899, + "learning_rate": 6.211505122514165e-07, + "loss": 0.0003, + "num_input_tokens_seen": 12336064, + "step": 25055 + }, + { + "epoch": 3.3073775900752276, + "grad_norm": 0.001593749038875103, + "learning_rate": 6.207241955668605e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12338752, + "step": 25060 + }, + { + "epoch": 3.3080374818529763, + "grad_norm": 0.007038143463432789, + "learning_rate": 6.202979593808478e-07, + "loss": 0.0, + "num_input_tokens_seen": 12341184, + "step": 25065 + }, + { + "epoch": 3.3086973736307246, + "grad_norm": 0.015235554426908493, + "learning_rate": 6.198718037838435e-07, + "loss": 0.0533, + "num_input_tokens_seen": 12343488, + "step": 25070 + }, + { + "epoch": 3.309357265408473, + "grad_norm": 0.0020212512463331223, + "learning_rate": 6.194457288662963e-07, + "loss": 0.0465, + "num_input_tokens_seen": 12346176, + "step": 25075 + }, + { + "epoch": 3.3100171571862216, + "grad_norm": 0.0019470115657895803, + "learning_rate": 6.190197347186374e-07, + "loss": 0.0, + "num_input_tokens_seen": 12348480, + "step": 25080 + }, + { + "epoch": 3.31067704896397, + "grad_norm": 0.03345693275332451, + "learning_rate": 6.185938214312808e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12350848, + "step": 25085 + }, + { + "epoch": 3.311336940741718, + "grad_norm": 0.0005512295756489038, + "learning_rate": 6.181679890946238e-07, + "loss": 0.0, + "num_input_tokens_seen": 12353472, + "step": 25090 + }, + { + "epoch": 3.311996832519467, + "grad_norm": 0.0008134461240842938, + "learning_rate": 6.17742237799046e-07, + "loss": 0.0, + "num_input_tokens_seen": 12356224, + "step": 25095 + }, + { + "epoch": 3.312656724297215, + "grad_norm": 0.0038204581942409277, + "learning_rate": 6.173165676349102e-07, + "loss": 0.0049, + "num_input_tokens_seen": 12358784, + "step": 25100 + }, + { + "epoch": 3.313316616074964, + "grad_norm": 0.00022062953212298453, + "learning_rate": 6.168909786925619e-07, + "loss": 0.0, + "num_input_tokens_seen": 12361088, + "step": 25105 + }, + { + "epoch": 3.313976507852712, + "grad_norm": 0.00046019876026548445, + "learning_rate": 6.164654710623289e-07, + "loss": 0.0, + "num_input_tokens_seen": 12363456, + "step": 25110 + }, + { + "epoch": 3.3146363996304604, + "grad_norm": 0.3447696566581726, + "learning_rate": 6.160400448345224e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12366016, + "step": 25115 + }, + { + "epoch": 3.315296291408209, + "grad_norm": 0.008802136406302452, + "learning_rate": 6.156147000994358e-07, + "loss": 0.0005, + "num_input_tokens_seen": 12368576, + "step": 25120 + }, + { + "epoch": 3.3159561831859574, + "grad_norm": 0.46979010105133057, + "learning_rate": 6.151894369473459e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12371008, + "step": 25125 + }, + { + "epoch": 3.316616074963706, + "grad_norm": 0.0001193025746033527, + "learning_rate": 6.147642554685112e-07, + "loss": 0.0, + "num_input_tokens_seen": 12373376, + "step": 25130 + }, + { + "epoch": 3.3172759667414544, + "grad_norm": 0.006421966478228569, + "learning_rate": 6.143391557531738e-07, + "loss": 0.0, + "num_input_tokens_seen": 12376064, + "step": 25135 + }, + { + "epoch": 3.3179358585192027, + "grad_norm": 0.0010537905618548393, + "learning_rate": 6.139141378915578e-07, + "loss": 0.0061, + "num_input_tokens_seen": 12378560, + "step": 25140 + }, + { + "epoch": 3.3185957502969514, + "grad_norm": 0.01963932067155838, + "learning_rate": 6.1348920197387e-07, + "loss": 0.0, + "num_input_tokens_seen": 12380928, + "step": 25145 + }, + { + "epoch": 3.3192556420746997, + "grad_norm": 0.292169988155365, + "learning_rate": 6.130643480903005e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12383360, + "step": 25150 + }, + { + "epoch": 3.3199155338524484, + "grad_norm": 0.0011927977902814746, + "learning_rate": 6.126395763310213e-07, + "loss": 0.0082, + "num_input_tokens_seen": 12385920, + "step": 25155 + }, + { + "epoch": 3.3205754256301967, + "grad_norm": 0.004244993906468153, + "learning_rate": 6.122148867861864e-07, + "loss": 0.0308, + "num_input_tokens_seen": 12388416, + "step": 25160 + }, + { + "epoch": 3.321235317407945, + "grad_norm": 11.786145210266113, + "learning_rate": 6.117902795459342e-07, + "loss": 0.02, + "num_input_tokens_seen": 12390976, + "step": 25165 + }, + { + "epoch": 3.3218952091856937, + "grad_norm": 0.06375247985124588, + "learning_rate": 6.113657547003834e-07, + "loss": 0.0, + "num_input_tokens_seen": 12393472, + "step": 25170 + }, + { + "epoch": 3.322555100963442, + "grad_norm": 0.0009331585606560111, + "learning_rate": 6.109413123396374e-07, + "loss": 0.0, + "num_input_tokens_seen": 12396224, + "step": 25175 + }, + { + "epoch": 3.3232149927411903, + "grad_norm": 0.00104428268969059, + "learning_rate": 6.105169525537805e-07, + "loss": 0.0266, + "num_input_tokens_seen": 12398656, + "step": 25180 + }, + { + "epoch": 3.323874884518939, + "grad_norm": 5.317957766237669e-05, + "learning_rate": 6.100926754328797e-07, + "loss": 0.0, + "num_input_tokens_seen": 12400960, + "step": 25185 + }, + { + "epoch": 3.3245347762966873, + "grad_norm": 0.010817881673574448, + "learning_rate": 6.096684810669855e-07, + "loss": 0.0, + "num_input_tokens_seen": 12403584, + "step": 25190 + }, + { + "epoch": 3.325194668074436, + "grad_norm": 0.00012285925913602114, + "learning_rate": 6.092443695461289e-07, + "loss": 0.0352, + "num_input_tokens_seen": 12406144, + "step": 25195 + }, + { + "epoch": 3.3258545598521843, + "grad_norm": 0.0024948231875896454, + "learning_rate": 6.08820340960326e-07, + "loss": 0.0, + "num_input_tokens_seen": 12408512, + "step": 25200 + }, + { + "epoch": 3.3265144516299325, + "grad_norm": 7.421601912938058e-05, + "learning_rate": 6.083963953995728e-07, + "loss": 0.0, + "num_input_tokens_seen": 12411136, + "step": 25205 + }, + { + "epoch": 3.3271743434076813, + "grad_norm": 1.3887579441070557, + "learning_rate": 6.079725329538486e-07, + "loss": 0.0005, + "num_input_tokens_seen": 12413312, + "step": 25210 + }, + { + "epoch": 3.3278342351854295, + "grad_norm": 15.47343921661377, + "learning_rate": 6.075487537131158e-07, + "loss": 0.028, + "num_input_tokens_seen": 12415744, + "step": 25215 + }, + { + "epoch": 3.328494126963178, + "grad_norm": 0.0005510664777830243, + "learning_rate": 6.071250577673179e-07, + "loss": 0.0006, + "num_input_tokens_seen": 12418112, + "step": 25220 + }, + { + "epoch": 3.3291540187409265, + "grad_norm": 0.0006473141256719828, + "learning_rate": 6.067014452063816e-07, + "loss": 0.0, + "num_input_tokens_seen": 12420672, + "step": 25225 + }, + { + "epoch": 3.329813910518675, + "grad_norm": 0.00029872983577661216, + "learning_rate": 6.062779161202156e-07, + "loss": 0.0, + "num_input_tokens_seen": 12422848, + "step": 25230 + }, + { + "epoch": 3.3304738022964235, + "grad_norm": 0.0003775710938498378, + "learning_rate": 6.058544705987105e-07, + "loss": 0.0, + "num_input_tokens_seen": 12425280, + "step": 25235 + }, + { + "epoch": 3.331133694074172, + "grad_norm": 0.019592612981796265, + "learning_rate": 6.0543110873174e-07, + "loss": 0.0366, + "num_input_tokens_seen": 12427712, + "step": 25240 + }, + { + "epoch": 3.33179358585192, + "grad_norm": 0.0002695015864446759, + "learning_rate": 6.050078306091595e-07, + "loss": 0.0, + "num_input_tokens_seen": 12430528, + "step": 25245 + }, + { + "epoch": 3.332453477629669, + "grad_norm": 0.0001931964507093653, + "learning_rate": 6.045846363208066e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12432768, + "step": 25250 + }, + { + "epoch": 3.333113369407417, + "grad_norm": 0.0038819850888103247, + "learning_rate": 6.041615259565014e-07, + "loss": 0.0, + "num_input_tokens_seen": 12435392, + "step": 25255 + }, + { + "epoch": 3.333773261185166, + "grad_norm": 0.000421057891799137, + "learning_rate": 6.037384996060455e-07, + "loss": 0.0, + "num_input_tokens_seen": 12437568, + "step": 25260 + }, + { + "epoch": 3.334433152962914, + "grad_norm": 10.183027267456055, + "learning_rate": 6.033155573592239e-07, + "loss": 0.0266, + "num_input_tokens_seen": 12439744, + "step": 25265 + }, + { + "epoch": 3.3350930447406624, + "grad_norm": 0.00021547931828536093, + "learning_rate": 6.028926993058026e-07, + "loss": 0.0252, + "num_input_tokens_seen": 12442048, + "step": 25270 + }, + { + "epoch": 3.335752936518411, + "grad_norm": 0.00020331527048256248, + "learning_rate": 6.024699255355302e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12444992, + "step": 25275 + }, + { + "epoch": 3.3364128282961594, + "grad_norm": 0.34224241971969604, + "learning_rate": 6.020472361381374e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12447296, + "step": 25280 + }, + { + "epoch": 3.337072720073908, + "grad_norm": 0.00011249903764110059, + "learning_rate": 6.016246312033371e-07, + "loss": 0.0, + "num_input_tokens_seen": 12449920, + "step": 25285 + }, + { + "epoch": 3.3377326118516564, + "grad_norm": 8.940664883994032e-06, + "learning_rate": 6.01202110820824e-07, + "loss": 0.0294, + "num_input_tokens_seen": 12452416, + "step": 25290 + }, + { + "epoch": 3.3383925036294047, + "grad_norm": 0.012581578455865383, + "learning_rate": 6.007796750802748e-07, + "loss": 0.0736, + "num_input_tokens_seen": 12454784, + "step": 25295 + }, + { + "epoch": 3.3390523954071534, + "grad_norm": 0.0010578975779935718, + "learning_rate": 6.003573240713489e-07, + "loss": 0.0, + "num_input_tokens_seen": 12457536, + "step": 25300 + }, + { + "epoch": 3.3397122871849017, + "grad_norm": 0.17746460437774658, + "learning_rate": 5.999350578836868e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12460032, + "step": 25305 + }, + { + "epoch": 3.3403721789626504, + "grad_norm": 7.115570042515174e-05, + "learning_rate": 5.995128766069118e-07, + "loss": 0.0, + "num_input_tokens_seen": 12462336, + "step": 25310 + }, + { + "epoch": 3.3410320707403987, + "grad_norm": 0.006361126434057951, + "learning_rate": 5.990907803306286e-07, + "loss": 0.0813, + "num_input_tokens_seen": 12464960, + "step": 25315 + }, + { + "epoch": 3.341691962518147, + "grad_norm": 0.0019897071179002523, + "learning_rate": 5.986687691444239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12467648, + "step": 25320 + }, + { + "epoch": 3.3423518542958957, + "grad_norm": 5.641576717607677e-05, + "learning_rate": 5.98246843137867e-07, + "loss": 0.0, + "num_input_tokens_seen": 12470144, + "step": 25325 + }, + { + "epoch": 3.343011746073644, + "grad_norm": 0.00012555062130559236, + "learning_rate": 5.978250024005082e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12472512, + "step": 25330 + }, + { + "epoch": 3.343671637851392, + "grad_norm": 0.0028107953257858753, + "learning_rate": 5.974032470218804e-07, + "loss": 0.0, + "num_input_tokens_seen": 12474880, + "step": 25335 + }, + { + "epoch": 3.344331529629141, + "grad_norm": 0.00012251742009539157, + "learning_rate": 5.969815770914983e-07, + "loss": 0.0502, + "num_input_tokens_seen": 12477632, + "step": 25340 + }, + { + "epoch": 3.344991421406889, + "grad_norm": 0.029161326587200165, + "learning_rate": 5.965599926988575e-07, + "loss": 0.0, + "num_input_tokens_seen": 12480704, + "step": 25345 + }, + { + "epoch": 3.3456513131846375, + "grad_norm": 8.900999091565609e-06, + "learning_rate": 5.961384939334373e-07, + "loss": 0.0003, + "num_input_tokens_seen": 12483200, + "step": 25350 + }, + { + "epoch": 3.346311204962386, + "grad_norm": 0.010479514487087727, + "learning_rate": 5.957170808846968e-07, + "loss": 0.0011, + "num_input_tokens_seen": 12485504, + "step": 25355 + }, + { + "epoch": 3.3469710967401345, + "grad_norm": 0.014084945432841778, + "learning_rate": 5.952957536420786e-07, + "loss": 0.0, + "num_input_tokens_seen": 12487872, + "step": 25360 + }, + { + "epoch": 3.347630988517883, + "grad_norm": 0.002309830393642187, + "learning_rate": 5.948745122950061e-07, + "loss": 0.0, + "num_input_tokens_seen": 12489984, + "step": 25365 + }, + { + "epoch": 3.3482908802956315, + "grad_norm": 0.050204720348119736, + "learning_rate": 5.944533569328841e-07, + "loss": 0.0, + "num_input_tokens_seen": 12492544, + "step": 25370 + }, + { + "epoch": 3.3489507720733798, + "grad_norm": 0.0001361641043331474, + "learning_rate": 5.940322876451009e-07, + "loss": 0.0267, + "num_input_tokens_seen": 12494592, + "step": 25375 + }, + { + "epoch": 3.3496106638511285, + "grad_norm": 1.233882903761696e-05, + "learning_rate": 5.936113045210245e-07, + "loss": 0.0, + "num_input_tokens_seen": 12496896, + "step": 25380 + }, + { + "epoch": 3.3502705556288768, + "grad_norm": 53.146240234375, + "learning_rate": 5.931904076500062e-07, + "loss": 0.1251, + "num_input_tokens_seen": 12499648, + "step": 25385 + }, + { + "epoch": 3.3509304474066255, + "grad_norm": 3.949514575651847e-05, + "learning_rate": 5.927695971213781e-07, + "loss": 0.002, + "num_input_tokens_seen": 12502272, + "step": 25390 + }, + { + "epoch": 3.3515903391843738, + "grad_norm": 3.287645085947588e-05, + "learning_rate": 5.923488730244537e-07, + "loss": 0.0, + "num_input_tokens_seen": 12504960, + "step": 25395 + }, + { + "epoch": 3.352250230962122, + "grad_norm": 0.45993924140930176, + "learning_rate": 5.919282354485293e-07, + "loss": 0.0413, + "num_input_tokens_seen": 12507456, + "step": 25400 + }, + { + "epoch": 3.3529101227398708, + "grad_norm": 1.4746529814146925e-05, + "learning_rate": 5.915076844828817e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12509824, + "step": 25405 + }, + { + "epoch": 3.353570014517619, + "grad_norm": 0.00013451321865431964, + "learning_rate": 5.910872202167701e-07, + "loss": 0.0, + "num_input_tokens_seen": 12512576, + "step": 25410 + }, + { + "epoch": 3.3542299062953678, + "grad_norm": 0.0022569603752344847, + "learning_rate": 5.90666842739435e-07, + "loss": 0.0, + "num_input_tokens_seen": 12515072, + "step": 25415 + }, + { + "epoch": 3.354889798073116, + "grad_norm": 0.030115384608507156, + "learning_rate": 5.902465521400982e-07, + "loss": 0.0025, + "num_input_tokens_seen": 12517632, + "step": 25420 + }, + { + "epoch": 3.3555496898508643, + "grad_norm": 0.0004955868935212493, + "learning_rate": 5.898263485079636e-07, + "loss": 0.0032, + "num_input_tokens_seen": 12519872, + "step": 25425 + }, + { + "epoch": 3.356209581628613, + "grad_norm": 0.03164421021938324, + "learning_rate": 5.89406231932216e-07, + "loss": 0.0, + "num_input_tokens_seen": 12522304, + "step": 25430 + }, + { + "epoch": 3.3568694734063613, + "grad_norm": 2.380097794230096e-05, + "learning_rate": 5.889862025020227e-07, + "loss": 0.0922, + "num_input_tokens_seen": 12524480, + "step": 25435 + }, + { + "epoch": 3.35752936518411, + "grad_norm": 0.113060362637043, + "learning_rate": 5.885662603065316e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12527040, + "step": 25440 + }, + { + "epoch": 3.3581892569618583, + "grad_norm": 0.00010437117452966049, + "learning_rate": 5.881464054348721e-07, + "loss": 0.0, + "num_input_tokens_seen": 12529216, + "step": 25445 + }, + { + "epoch": 3.3588491487396066, + "grad_norm": 0.002310203155502677, + "learning_rate": 5.877266379761561e-07, + "loss": 0.0, + "num_input_tokens_seen": 12531584, + "step": 25450 + }, + { + "epoch": 3.3595090405173553, + "grad_norm": 0.010437843389809132, + "learning_rate": 5.873069580194753e-07, + "loss": 0.0, + "num_input_tokens_seen": 12534144, + "step": 25455 + }, + { + "epoch": 3.3601689322951036, + "grad_norm": 1.0495466085558292e-05, + "learning_rate": 5.868873656539044e-07, + "loss": 0.0337, + "num_input_tokens_seen": 12536320, + "step": 25460 + }, + { + "epoch": 3.360828824072852, + "grad_norm": 0.010058234445750713, + "learning_rate": 5.864678609684986e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12538624, + "step": 25465 + }, + { + "epoch": 3.3614887158506006, + "grad_norm": 5.52868950762786e-05, + "learning_rate": 5.860484440522946e-07, + "loss": 0.0007, + "num_input_tokens_seen": 12541120, + "step": 25470 + }, + { + "epoch": 3.362148607628349, + "grad_norm": 8.224558769143187e-06, + "learning_rate": 5.856291149943109e-07, + "loss": 0.1057, + "num_input_tokens_seen": 12543424, + "step": 25475 + }, + { + "epoch": 3.362808499406097, + "grad_norm": 20.38540267944336, + "learning_rate": 5.852098738835467e-07, + "loss": 0.0891, + "num_input_tokens_seen": 12545984, + "step": 25480 + }, + { + "epoch": 3.363468391183846, + "grad_norm": 740.0801391601562, + "learning_rate": 5.847907208089834e-07, + "loss": 0.0404, + "num_input_tokens_seen": 12548608, + "step": 25485 + }, + { + "epoch": 3.364128282961594, + "grad_norm": 0.0001841172925196588, + "learning_rate": 5.843716558595831e-07, + "loss": 0.0, + "num_input_tokens_seen": 12551104, + "step": 25490 + }, + { + "epoch": 3.364788174739343, + "grad_norm": 13.210966110229492, + "learning_rate": 5.839526791242883e-07, + "loss": 0.0276, + "num_input_tokens_seen": 12553600, + "step": 25495 + }, + { + "epoch": 3.365448066517091, + "grad_norm": 0.026582859456539154, + "learning_rate": 5.835337906920253e-07, + "loss": 0.0009, + "num_input_tokens_seen": 12556032, + "step": 25500 + }, + { + "epoch": 3.3661079582948394, + "grad_norm": 0.0003508688823785633, + "learning_rate": 5.831149906516989e-07, + "loss": 0.0394, + "num_input_tokens_seen": 12558464, + "step": 25505 + }, + { + "epoch": 3.366767850072588, + "grad_norm": 0.006343053188174963, + "learning_rate": 5.826962790921974e-07, + "loss": 0.0, + "num_input_tokens_seen": 12560704, + "step": 25510 + }, + { + "epoch": 3.3674277418503364, + "grad_norm": 9.642515215091407e-05, + "learning_rate": 5.822776561023885e-07, + "loss": 0.0337, + "num_input_tokens_seen": 12563136, + "step": 25515 + }, + { + "epoch": 3.368087633628085, + "grad_norm": 0.12306500971317291, + "learning_rate": 5.81859121771122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12565376, + "step": 25520 + }, + { + "epoch": 3.3687475254058334, + "grad_norm": 0.0005888367886655033, + "learning_rate": 5.814406761872294e-07, + "loss": 0.0, + "num_input_tokens_seen": 12567680, + "step": 25525 + }, + { + "epoch": 3.3694074171835817, + "grad_norm": 2.025174617767334, + "learning_rate": 5.810223194395221e-07, + "loss": 0.0018, + "num_input_tokens_seen": 12570176, + "step": 25530 + }, + { + "epoch": 3.3700673089613304, + "grad_norm": 0.0001719648134894669, + "learning_rate": 5.806040516167933e-07, + "loss": 0.0631, + "num_input_tokens_seen": 12572672, + "step": 25535 + }, + { + "epoch": 3.3707272007390787, + "grad_norm": 0.03616896644234657, + "learning_rate": 5.801858728078179e-07, + "loss": 0.0364, + "num_input_tokens_seen": 12575296, + "step": 25540 + }, + { + "epoch": 3.3713870925168274, + "grad_norm": 0.00024308938009198755, + "learning_rate": 5.797677831013506e-07, + "loss": 0.076, + "num_input_tokens_seen": 12577856, + "step": 25545 + }, + { + "epoch": 3.3720469842945757, + "grad_norm": 1.1081480979919434, + "learning_rate": 5.793497825861283e-07, + "loss": 0.0607, + "num_input_tokens_seen": 12580224, + "step": 25550 + }, + { + "epoch": 3.372706876072324, + "grad_norm": 0.004675113596022129, + "learning_rate": 5.789318713508686e-07, + "loss": 0.055, + "num_input_tokens_seen": 12582464, + "step": 25555 + }, + { + "epoch": 3.3733667678500727, + "grad_norm": 0.1336895078420639, + "learning_rate": 5.785140494842704e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12584768, + "step": 25560 + }, + { + "epoch": 3.374026659627821, + "grad_norm": 0.20448976755142212, + "learning_rate": 5.780963170750129e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12587264, + "step": 25565 + }, + { + "epoch": 3.3746865514055697, + "grad_norm": 0.13979199528694153, + "learning_rate": 5.776786742117564e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12589760, + "step": 25570 + }, + { + "epoch": 3.375346443183318, + "grad_norm": 25.667259216308594, + "learning_rate": 5.772611209831436e-07, + "loss": 0.0678, + "num_input_tokens_seen": 12592320, + "step": 25575 + }, + { + "epoch": 3.3760063349610663, + "grad_norm": 0.2753572165966034, + "learning_rate": 5.768436574777964e-07, + "loss": 0.0014, + "num_input_tokens_seen": 12594944, + "step": 25580 + }, + { + "epoch": 3.376666226738815, + "grad_norm": 0.08741661161184311, + "learning_rate": 5.764262837843186e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12597312, + "step": 25585 + }, + { + "epoch": 3.3773261185165633, + "grad_norm": 1.0904316902160645, + "learning_rate": 5.760089999912947e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12599744, + "step": 25590 + }, + { + "epoch": 3.3779860102943116, + "grad_norm": 43.92424392700195, + "learning_rate": 5.755918061872907e-07, + "loss": 0.0473, + "num_input_tokens_seen": 12602496, + "step": 25595 + }, + { + "epoch": 3.3786459020720603, + "grad_norm": 0.0960235595703125, + "learning_rate": 5.751747024608527e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12604736, + "step": 25600 + }, + { + "epoch": 3.3793057938498086, + "grad_norm": 8.825310214888304e-05, + "learning_rate": 5.747576889005068e-07, + "loss": 0.0474, + "num_input_tokens_seen": 12607168, + "step": 25605 + }, + { + "epoch": 3.379965685627557, + "grad_norm": 0.486053466796875, + "learning_rate": 5.743407655947627e-07, + "loss": 0.0322, + "num_input_tokens_seen": 12609920, + "step": 25610 + }, + { + "epoch": 3.3806255774053056, + "grad_norm": 26.345102310180664, + "learning_rate": 5.739239326321086e-07, + "loss": 0.0329, + "num_input_tokens_seen": 12612160, + "step": 25615 + }, + { + "epoch": 3.381285469183054, + "grad_norm": 0.013855023309588432, + "learning_rate": 5.735071901010146e-07, + "loss": 0.0, + "num_input_tokens_seen": 12614720, + "step": 25620 + }, + { + "epoch": 3.3819453609608026, + "grad_norm": 0.0022012635599821806, + "learning_rate": 5.730905380899309e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12617472, + "step": 25625 + }, + { + "epoch": 3.382605252738551, + "grad_norm": 0.006016657687723637, + "learning_rate": 5.72673976687289e-07, + "loss": 0.0, + "num_input_tokens_seen": 12619968, + "step": 25630 + }, + { + "epoch": 3.383265144516299, + "grad_norm": 0.0054673487320542336, + "learning_rate": 5.722575059815014e-07, + "loss": 0.0022, + "num_input_tokens_seen": 12622272, + "step": 25635 + }, + { + "epoch": 3.383925036294048, + "grad_norm": 0.5703974366188049, + "learning_rate": 5.718411260609599e-07, + "loss": 0.0005, + "num_input_tokens_seen": 12624576, + "step": 25640 + }, + { + "epoch": 3.384584928071796, + "grad_norm": 2.5996956825256348, + "learning_rate": 5.714248370140397e-07, + "loss": 0.0251, + "num_input_tokens_seen": 12627264, + "step": 25645 + }, + { + "epoch": 3.385244819849545, + "grad_norm": 0.038143787533044815, + "learning_rate": 5.710086389290945e-07, + "loss": 0.004, + "num_input_tokens_seen": 12629952, + "step": 25650 + }, + { + "epoch": 3.385904711627293, + "grad_norm": 0.0001890839048428461, + "learning_rate": 5.705925318944585e-07, + "loss": 0.0, + "num_input_tokens_seen": 12632768, + "step": 25655 + }, + { + "epoch": 3.3865646034050414, + "grad_norm": 0.0002839722437784076, + "learning_rate": 5.701765159984483e-07, + "loss": 0.0, + "num_input_tokens_seen": 12635008, + "step": 25660 + }, + { + "epoch": 3.38722449518279, + "grad_norm": 0.00851992517709732, + "learning_rate": 5.6976059132936e-07, + "loss": 0.0, + "num_input_tokens_seen": 12637568, + "step": 25665 + }, + { + "epoch": 3.3878843869605384, + "grad_norm": 0.0002972839865833521, + "learning_rate": 5.69344757975471e-07, + "loss": 0.0675, + "num_input_tokens_seen": 12640192, + "step": 25670 + }, + { + "epoch": 3.388544278738287, + "grad_norm": 336.3095703125, + "learning_rate": 5.689290160250382e-07, + "loss": 0.1155, + "num_input_tokens_seen": 12642688, + "step": 25675 + }, + { + "epoch": 3.3892041705160354, + "grad_norm": 4.639300346374512, + "learning_rate": 5.685133655663001e-07, + "loss": 0.0025, + "num_input_tokens_seen": 12645376, + "step": 25680 + }, + { + "epoch": 3.3898640622937837, + "grad_norm": 0.0036630574613809586, + "learning_rate": 5.68097806687476e-07, + "loss": 0.1427, + "num_input_tokens_seen": 12648000, + "step": 25685 + }, + { + "epoch": 3.3905239540715324, + "grad_norm": 0.00019933377916458994, + "learning_rate": 5.676823394767644e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12650816, + "step": 25690 + }, + { + "epoch": 3.3911838458492807, + "grad_norm": 0.002877293387427926, + "learning_rate": 5.672669640223458e-07, + "loss": 0.0044, + "num_input_tokens_seen": 12653376, + "step": 25695 + }, + { + "epoch": 3.3918437376270294, + "grad_norm": 1.301897646044381e-05, + "learning_rate": 5.668516804123808e-07, + "loss": 0.0, + "num_input_tokens_seen": 12656000, + "step": 25700 + }, + { + "epoch": 3.3925036294047777, + "grad_norm": 0.9059159159660339, + "learning_rate": 5.664364887350097e-07, + "loss": 0.0018, + "num_input_tokens_seen": 12658496, + "step": 25705 + }, + { + "epoch": 3.393163521182526, + "grad_norm": 0.0015077658463269472, + "learning_rate": 5.660213890783541e-07, + "loss": 0.0, + "num_input_tokens_seen": 12660864, + "step": 25710 + }, + { + "epoch": 3.3938234129602747, + "grad_norm": 0.00017341156490147114, + "learning_rate": 5.656063815305161e-07, + "loss": 0.0, + "num_input_tokens_seen": 12663424, + "step": 25715 + }, + { + "epoch": 3.394483304738023, + "grad_norm": 0.00016228918684646487, + "learning_rate": 5.651914661795785e-07, + "loss": 0.0, + "num_input_tokens_seen": 12665856, + "step": 25720 + }, + { + "epoch": 3.3951431965157712, + "grad_norm": 0.08427057415246964, + "learning_rate": 5.64776643113603e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12668352, + "step": 25725 + }, + { + "epoch": 3.39580308829352, + "grad_norm": 0.00015899655409157276, + "learning_rate": 5.643619124206333e-07, + "loss": 0.0, + "num_input_tokens_seen": 12670592, + "step": 25730 + }, + { + "epoch": 3.3964629800712682, + "grad_norm": 1.2886687727586832e-05, + "learning_rate": 5.639472741886937e-07, + "loss": 0.0518, + "num_input_tokens_seen": 12673024, + "step": 25735 + }, + { + "epoch": 3.3971228718490165, + "grad_norm": 0.14700756967067719, + "learning_rate": 5.635327285057869e-07, + "loss": 0.0003, + "num_input_tokens_seen": 12675456, + "step": 25740 + }, + { + "epoch": 3.3977827636267652, + "grad_norm": 0.002936385106295347, + "learning_rate": 5.63118275459898e-07, + "loss": 0.0, + "num_input_tokens_seen": 12678080, + "step": 25745 + }, + { + "epoch": 3.3984426554045135, + "grad_norm": 0.00019914221775252372, + "learning_rate": 5.627039151389917e-07, + "loss": 0.0595, + "num_input_tokens_seen": 12680320, + "step": 25750 + }, + { + "epoch": 3.3991025471822622, + "grad_norm": 1.2330317076703068e-05, + "learning_rate": 5.622896476310125e-07, + "loss": 0.0, + "num_input_tokens_seen": 12682624, + "step": 25755 + }, + { + "epoch": 3.3997624389600105, + "grad_norm": 0.000984366051852703, + "learning_rate": 5.618754730238863e-07, + "loss": 0.0, + "num_input_tokens_seen": 12685120, + "step": 25760 + }, + { + "epoch": 3.400422330737759, + "grad_norm": 1.5101558346941601e-05, + "learning_rate": 5.614613914055175e-07, + "loss": 0.2906, + "num_input_tokens_seen": 12687808, + "step": 25765 + }, + { + "epoch": 3.4010822225155075, + "grad_norm": 0.0074266353622078896, + "learning_rate": 5.610474028637935e-07, + "loss": 0.0, + "num_input_tokens_seen": 12690240, + "step": 25770 + }, + { + "epoch": 3.401742114293256, + "grad_norm": 3.550009205355309e-05, + "learning_rate": 5.606335074865795e-07, + "loss": 0.0, + "num_input_tokens_seen": 12692544, + "step": 25775 + }, + { + "epoch": 3.4024020060710045, + "grad_norm": 0.0015909220091998577, + "learning_rate": 5.602197053617214e-07, + "loss": 0.0, + "num_input_tokens_seen": 12695104, + "step": 25780 + }, + { + "epoch": 3.403061897848753, + "grad_norm": 0.0009785228176042438, + "learning_rate": 5.598059965770468e-07, + "loss": 0.0, + "num_input_tokens_seen": 12697536, + "step": 25785 + }, + { + "epoch": 3.403721789626501, + "grad_norm": 0.0001278437121072784, + "learning_rate": 5.593923812203613e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12699840, + "step": 25790 + }, + { + "epoch": 3.40438168140425, + "grad_norm": 0.0002581992303021252, + "learning_rate": 5.589788593794529e-07, + "loss": 0.0, + "num_input_tokens_seen": 12702336, + "step": 25795 + }, + { + "epoch": 3.405041573181998, + "grad_norm": 0.00014046661090105772, + "learning_rate": 5.585654311420873e-07, + "loss": 0.0, + "num_input_tokens_seen": 12704640, + "step": 25800 + }, + { + "epoch": 3.405701464959747, + "grad_norm": 0.049336936324834824, + "learning_rate": 5.581520965960125e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12707264, + "step": 25805 + }, + { + "epoch": 3.406361356737495, + "grad_norm": 4.411644840729423e-05, + "learning_rate": 5.57738855828956e-07, + "loss": 0.028, + "num_input_tokens_seen": 12709888, + "step": 25810 + }, + { + "epoch": 3.4070212485152434, + "grad_norm": 0.0019217153312638402, + "learning_rate": 5.573257089286243e-07, + "loss": 0.0, + "num_input_tokens_seen": 12712192, + "step": 25815 + }, + { + "epoch": 3.407681140292992, + "grad_norm": 16.16039276123047, + "learning_rate": 5.569126559827053e-07, + "loss": 0.0454, + "num_input_tokens_seen": 12714560, + "step": 25820 + }, + { + "epoch": 3.4083410320707404, + "grad_norm": 0.0001751862291712314, + "learning_rate": 5.564996970788667e-07, + "loss": 0.0, + "num_input_tokens_seen": 12717056, + "step": 25825 + }, + { + "epoch": 3.409000923848489, + "grad_norm": 0.0007003069040365517, + "learning_rate": 5.560868323047556e-07, + "loss": 0.0059, + "num_input_tokens_seen": 12719552, + "step": 25830 + }, + { + "epoch": 3.4096608156262374, + "grad_norm": 1.921590774145443e-05, + "learning_rate": 5.556740617479998e-07, + "loss": 0.0, + "num_input_tokens_seen": 12721920, + "step": 25835 + }, + { + "epoch": 3.4103207074039856, + "grad_norm": 7.84834410296753e-05, + "learning_rate": 5.552613854962067e-07, + "loss": 0.0352, + "num_input_tokens_seen": 12724608, + "step": 25840 + }, + { + "epoch": 3.4109805991817344, + "grad_norm": 19.140037536621094, + "learning_rate": 5.548488036369645e-07, + "loss": 0.0472, + "num_input_tokens_seen": 12727168, + "step": 25845 + }, + { + "epoch": 3.4116404909594826, + "grad_norm": 0.00015301394159905612, + "learning_rate": 5.544363162578399e-07, + "loss": 0.0, + "num_input_tokens_seen": 12729408, + "step": 25850 + }, + { + "epoch": 3.412300382737231, + "grad_norm": 0.0004984589177183807, + "learning_rate": 5.540239234463804e-07, + "loss": 0.0, + "num_input_tokens_seen": 12732160, + "step": 25855 + }, + { + "epoch": 3.4129602745149796, + "grad_norm": 0.0006072504911571741, + "learning_rate": 5.536116252901142e-07, + "loss": 0.0, + "num_input_tokens_seen": 12734656, + "step": 25860 + }, + { + "epoch": 3.413620166292728, + "grad_norm": 0.004299015738070011, + "learning_rate": 5.531994218765477e-07, + "loss": 0.0213, + "num_input_tokens_seen": 12737216, + "step": 25865 + }, + { + "epoch": 3.4142800580704766, + "grad_norm": 0.01571951061487198, + "learning_rate": 5.527873132931682e-07, + "loss": 0.0, + "num_input_tokens_seen": 12739968, + "step": 25870 + }, + { + "epoch": 3.414939949848225, + "grad_norm": 12.407855987548828, + "learning_rate": 5.523752996274435e-07, + "loss": 0.0366, + "num_input_tokens_seen": 12742208, + "step": 25875 + }, + { + "epoch": 3.415599841625973, + "grad_norm": 0.0006227616686373949, + "learning_rate": 5.519633809668197e-07, + "loss": 0.0003, + "num_input_tokens_seen": 12744640, + "step": 25880 + }, + { + "epoch": 3.416259733403722, + "grad_norm": 1.4746785163879395, + "learning_rate": 5.515515573987238e-07, + "loss": 0.0007, + "num_input_tokens_seen": 12747008, + "step": 25885 + }, + { + "epoch": 3.41691962518147, + "grad_norm": 0.004045584239065647, + "learning_rate": 5.511398290105625e-07, + "loss": 0.1469, + "num_input_tokens_seen": 12749568, + "step": 25890 + }, + { + "epoch": 3.4175795169592185, + "grad_norm": 0.7868015170097351, + "learning_rate": 5.507281958897224e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12752128, + "step": 25895 + }, + { + "epoch": 3.418239408736967, + "grad_norm": 0.31684139370918274, + "learning_rate": 5.503166581235694e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12754560, + "step": 25900 + }, + { + "epoch": 3.4188993005147155, + "grad_norm": 0.01085609383881092, + "learning_rate": 5.499052157994486e-07, + "loss": 0.0938, + "num_input_tokens_seen": 12757248, + "step": 25905 + }, + { + "epoch": 3.419559192292464, + "grad_norm": 0.06237511336803436, + "learning_rate": 5.49493869004687e-07, + "loss": 0.0352, + "num_input_tokens_seen": 12759872, + "step": 25910 + }, + { + "epoch": 3.4202190840702125, + "grad_norm": 0.016344398260116577, + "learning_rate": 5.490826178265893e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12762240, + "step": 25915 + }, + { + "epoch": 3.4208789758479607, + "grad_norm": 0.3351937532424927, + "learning_rate": 5.486714623524405e-07, + "loss": 0.0026, + "num_input_tokens_seen": 12764544, + "step": 25920 + }, + { + "epoch": 3.4215388676257095, + "grad_norm": 0.01966346614062786, + "learning_rate": 5.482604026695057e-07, + "loss": 0.0, + "num_input_tokens_seen": 12767296, + "step": 25925 + }, + { + "epoch": 3.4221987594034577, + "grad_norm": 45.29475402832031, + "learning_rate": 5.478494388650295e-07, + "loss": 0.1487, + "num_input_tokens_seen": 12769920, + "step": 25930 + }, + { + "epoch": 3.4228586511812065, + "grad_norm": 19.655010223388672, + "learning_rate": 5.474385710262357e-07, + "loss": 0.0041, + "num_input_tokens_seen": 12772416, + "step": 25935 + }, + { + "epoch": 3.4235185429589547, + "grad_norm": 0.4863883852958679, + "learning_rate": 5.470277992403271e-07, + "loss": 0.0268, + "num_input_tokens_seen": 12774720, + "step": 25940 + }, + { + "epoch": 3.424178434736703, + "grad_norm": 0.00197918270714581, + "learning_rate": 5.466171235944889e-07, + "loss": 0.0657, + "num_input_tokens_seen": 12777152, + "step": 25945 + }, + { + "epoch": 3.4248383265144517, + "grad_norm": 0.003174730809405446, + "learning_rate": 5.462065441758826e-07, + "loss": 0.0, + "num_input_tokens_seen": 12779776, + "step": 25950 + }, + { + "epoch": 3.4254982182922, + "grad_norm": 0.00010079597268486395, + "learning_rate": 5.457960610716515e-07, + "loss": 0.0984, + "num_input_tokens_seen": 12782272, + "step": 25955 + }, + { + "epoch": 3.4261581100699487, + "grad_norm": 0.05801895633339882, + "learning_rate": 5.453856743689172e-07, + "loss": 0.0387, + "num_input_tokens_seen": 12785088, + "step": 25960 + }, + { + "epoch": 3.426818001847697, + "grad_norm": 0.014650064520537853, + "learning_rate": 5.449753841547811e-07, + "loss": 0.0009, + "num_input_tokens_seen": 12787392, + "step": 25965 + }, + { + "epoch": 3.4274778936254453, + "grad_norm": 1.5185226202011108, + "learning_rate": 5.445651905163253e-07, + "loss": 0.0017, + "num_input_tokens_seen": 12789952, + "step": 25970 + }, + { + "epoch": 3.428137785403194, + "grad_norm": 0.00041124902782030404, + "learning_rate": 5.441550935406091e-07, + "loss": 0.02, + "num_input_tokens_seen": 12792640, + "step": 25975 + }, + { + "epoch": 3.4287976771809423, + "grad_norm": 0.22763076424598694, + "learning_rate": 5.43745093314674e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12795136, + "step": 25980 + }, + { + "epoch": 3.4294575689586906, + "grad_norm": 0.0003356730449013412, + "learning_rate": 5.433351899255389e-07, + "loss": 0.0, + "num_input_tokens_seen": 12797824, + "step": 25985 + }, + { + "epoch": 3.4301174607364393, + "grad_norm": 0.04222612828016281, + "learning_rate": 5.429253834602025e-07, + "loss": 0.0013, + "num_input_tokens_seen": 12800640, + "step": 25990 + }, + { + "epoch": 3.4307773525141876, + "grad_norm": 0.05344577878713608, + "learning_rate": 5.425156740056436e-07, + "loss": 0.0, + "num_input_tokens_seen": 12803200, + "step": 25995 + }, + { + "epoch": 3.4314372442919363, + "grad_norm": 0.001554910559207201, + "learning_rate": 5.4210606164882e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12805696, + "step": 26000 + }, + { + "epoch": 3.4320971360696846, + "grad_norm": 33.12980651855469, + "learning_rate": 5.416965464766694e-07, + "loss": 0.1064, + "num_input_tokens_seen": 12808640, + "step": 26005 + }, + { + "epoch": 3.432757027847433, + "grad_norm": 0.00654615368694067, + "learning_rate": 5.412871285761076e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12811328, + "step": 26010 + }, + { + "epoch": 3.4334169196251816, + "grad_norm": 8.831528248265386e-05, + "learning_rate": 5.408778080340311e-07, + "loss": 0.0701, + "num_input_tokens_seen": 12814144, + "step": 26015 + }, + { + "epoch": 3.43407681140293, + "grad_norm": 0.002978324657306075, + "learning_rate": 5.404685849373154e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12816512, + "step": 26020 + }, + { + "epoch": 3.434736703180678, + "grad_norm": 0.00023777736350893974, + "learning_rate": 5.400594593728146e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12818752, + "step": 26025 + }, + { + "epoch": 3.435396594958427, + "grad_norm": 0.0007493056473322213, + "learning_rate": 5.396504314273629e-07, + "loss": 0.0, + "num_input_tokens_seen": 12821312, + "step": 26030 + }, + { + "epoch": 3.436056486736175, + "grad_norm": 0.018175829201936722, + "learning_rate": 5.39241501187774e-07, + "loss": 0.0, + "num_input_tokens_seen": 12823872, + "step": 26035 + }, + { + "epoch": 3.436716378513924, + "grad_norm": 0.0037966063246130943, + "learning_rate": 5.388326687408395e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12826560, + "step": 26040 + }, + { + "epoch": 3.437376270291672, + "grad_norm": 0.0003152771678287536, + "learning_rate": 5.384239341733314e-07, + "loss": 0.0, + "num_input_tokens_seen": 12829248, + "step": 26045 + }, + { + "epoch": 3.4380361620694204, + "grad_norm": 0.001747465692460537, + "learning_rate": 5.38015297572001e-07, + "loss": 0.0, + "num_input_tokens_seen": 12831616, + "step": 26050 + }, + { + "epoch": 3.438696053847169, + "grad_norm": 0.0026879829820245504, + "learning_rate": 5.376067590235786e-07, + "loss": 0.0, + "num_input_tokens_seen": 12834240, + "step": 26055 + }, + { + "epoch": 3.4393559456249174, + "grad_norm": 0.0006490828818641603, + "learning_rate": 5.371983186147729e-07, + "loss": 0.0, + "num_input_tokens_seen": 12836928, + "step": 26060 + }, + { + "epoch": 3.440015837402666, + "grad_norm": 0.0013973484747111797, + "learning_rate": 5.367899764322725e-07, + "loss": 0.0016, + "num_input_tokens_seen": 12839808, + "step": 26065 + }, + { + "epoch": 3.4406757291804144, + "grad_norm": 0.20865289866924286, + "learning_rate": 5.363817325627458e-07, + "loss": 0.0673, + "num_input_tokens_seen": 12842176, + "step": 26070 + }, + { + "epoch": 3.4413356209581627, + "grad_norm": 0.08312004804611206, + "learning_rate": 5.359735870928388e-07, + "loss": 0.0, + "num_input_tokens_seen": 12844672, + "step": 26075 + }, + { + "epoch": 3.4419955127359114, + "grad_norm": 0.0006424880702979863, + "learning_rate": 5.355655401091776e-07, + "loss": 0.0938, + "num_input_tokens_seen": 12846976, + "step": 26080 + }, + { + "epoch": 3.4426554045136597, + "grad_norm": 0.04234812408685684, + "learning_rate": 5.351575916983677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12849600, + "step": 26085 + }, + { + "epoch": 3.4433152962914084, + "grad_norm": 0.0009398151305504143, + "learning_rate": 5.347497419469926e-07, + "loss": 0.0457, + "num_input_tokens_seen": 12851776, + "step": 26090 + }, + { + "epoch": 3.4439751880691567, + "grad_norm": 0.034936707466840744, + "learning_rate": 5.34341990941616e-07, + "loss": 0.0, + "num_input_tokens_seen": 12854144, + "step": 26095 + }, + { + "epoch": 3.444635079846905, + "grad_norm": 0.0014790110290050507, + "learning_rate": 5.33934338768779e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12856640, + "step": 26100 + }, + { + "epoch": 3.4452949716246537, + "grad_norm": 0.0009260703809559345, + "learning_rate": 5.335267855150045e-07, + "loss": 0.0, + "num_input_tokens_seen": 12859072, + "step": 26105 + }, + { + "epoch": 3.445954863402402, + "grad_norm": 0.0021803437266498804, + "learning_rate": 5.331193312667916e-07, + "loss": 0.0, + "num_input_tokens_seen": 12861824, + "step": 26110 + }, + { + "epoch": 3.4466147551801507, + "grad_norm": 0.0007027118699625134, + "learning_rate": 5.327119761106193e-07, + "loss": 0.0298, + "num_input_tokens_seen": 12864064, + "step": 26115 + }, + { + "epoch": 3.447274646957899, + "grad_norm": 0.0019124702084809542, + "learning_rate": 5.323047201329468e-07, + "loss": 0.0036, + "num_input_tokens_seen": 12866688, + "step": 26120 + }, + { + "epoch": 3.4479345387356473, + "grad_norm": 0.001602896605618298, + "learning_rate": 5.318975634202103e-07, + "loss": 0.0441, + "num_input_tokens_seen": 12869056, + "step": 26125 + }, + { + "epoch": 3.448594430513396, + "grad_norm": 0.006494051311165094, + "learning_rate": 5.314905060588266e-07, + "loss": 0.0, + "num_input_tokens_seen": 12871744, + "step": 26130 + }, + { + "epoch": 3.4492543222911443, + "grad_norm": 0.0012551875552162528, + "learning_rate": 5.310835481351901e-07, + "loss": 0.0, + "num_input_tokens_seen": 12874176, + "step": 26135 + }, + { + "epoch": 3.4499142140688925, + "grad_norm": 0.0018485430628061295, + "learning_rate": 5.306766897356747e-07, + "loss": 0.0, + "num_input_tokens_seen": 12876672, + "step": 26140 + }, + { + "epoch": 3.4505741058466413, + "grad_norm": 0.04104364663362503, + "learning_rate": 5.302699309466338e-07, + "loss": 0.0013, + "num_input_tokens_seen": 12879232, + "step": 26145 + }, + { + "epoch": 3.4512339976243895, + "grad_norm": 0.1153852790594101, + "learning_rate": 5.298632718543981e-07, + "loss": 0.072, + "num_input_tokens_seen": 12881856, + "step": 26150 + }, + { + "epoch": 3.451893889402138, + "grad_norm": 0.00025399410515092313, + "learning_rate": 5.294567125452785e-07, + "loss": 0.0009, + "num_input_tokens_seen": 12884160, + "step": 26155 + }, + { + "epoch": 3.4525537811798865, + "grad_norm": 0.0006861420115455985, + "learning_rate": 5.290502531055648e-07, + "loss": 0.0, + "num_input_tokens_seen": 12886592, + "step": 26160 + }, + { + "epoch": 3.453213672957635, + "grad_norm": 0.002263479633256793, + "learning_rate": 5.286438936215239e-07, + "loss": 0.0308, + "num_input_tokens_seen": 12888832, + "step": 26165 + }, + { + "epoch": 3.4538735647353835, + "grad_norm": 0.007285143714398146, + "learning_rate": 5.282376341794033e-07, + "loss": 0.0875, + "num_input_tokens_seen": 12891200, + "step": 26170 + }, + { + "epoch": 3.454533456513132, + "grad_norm": 0.027898499742150307, + "learning_rate": 5.278314748654287e-07, + "loss": 0.0, + "num_input_tokens_seen": 12893952, + "step": 26175 + }, + { + "epoch": 3.45519334829088, + "grad_norm": 75.30522918701172, + "learning_rate": 5.274254157658048e-07, + "loss": 0.0025, + "num_input_tokens_seen": 12896512, + "step": 26180 + }, + { + "epoch": 3.455853240068629, + "grad_norm": 0.00036869599716737866, + "learning_rate": 5.270194569667139e-07, + "loss": 0.0, + "num_input_tokens_seen": 12898880, + "step": 26185 + }, + { + "epoch": 3.456513131846377, + "grad_norm": 2.335038423538208, + "learning_rate": 5.266135985543181e-07, + "loss": 0.0949, + "num_input_tokens_seen": 12901632, + "step": 26190 + }, + { + "epoch": 3.457173023624126, + "grad_norm": 0.0013648836174979806, + "learning_rate": 5.262078406147585e-07, + "loss": 0.0, + "num_input_tokens_seen": 12903808, + "step": 26195 + }, + { + "epoch": 3.457832915401874, + "grad_norm": 0.0006372526404447854, + "learning_rate": 5.258021832341534e-07, + "loss": 0.0, + "num_input_tokens_seen": 12906112, + "step": 26200 + }, + { + "epoch": 3.4584928071796224, + "grad_norm": 0.0013917350443080068, + "learning_rate": 5.25396626498601e-07, + "loss": 0.0007, + "num_input_tokens_seen": 12908416, + "step": 26205 + }, + { + "epoch": 3.459152698957371, + "grad_norm": 0.007651422638446093, + "learning_rate": 5.249911704941782e-07, + "loss": 0.0, + "num_input_tokens_seen": 12910848, + "step": 26210 + }, + { + "epoch": 3.4598125907351194, + "grad_norm": 0.003433382837101817, + "learning_rate": 5.245858153069394e-07, + "loss": 0.0, + "num_input_tokens_seen": 12913408, + "step": 26215 + }, + { + "epoch": 3.460472482512868, + "grad_norm": 0.0034286656882613897, + "learning_rate": 5.241805610229185e-07, + "loss": 0.0016, + "num_input_tokens_seen": 12916032, + "step": 26220 + }, + { + "epoch": 3.4611323742906164, + "grad_norm": 11.557177543640137, + "learning_rate": 5.23775407728128e-07, + "loss": 0.0252, + "num_input_tokens_seen": 12918528, + "step": 26225 + }, + { + "epoch": 3.4617922660683647, + "grad_norm": 0.00010427210509078577, + "learning_rate": 5.23370355508559e-07, + "loss": 0.0, + "num_input_tokens_seen": 12920960, + "step": 26230 + }, + { + "epoch": 3.4624521578461134, + "grad_norm": 0.006644572596997023, + "learning_rate": 5.229654044501802e-07, + "loss": 0.0, + "num_input_tokens_seen": 12923456, + "step": 26235 + }, + { + "epoch": 3.4631120496238617, + "grad_norm": 0.004640599247068167, + "learning_rate": 5.2256055463894e-07, + "loss": 0.0, + "num_input_tokens_seen": 12925760, + "step": 26240 + }, + { + "epoch": 3.4637719414016104, + "grad_norm": 0.01460598036646843, + "learning_rate": 5.221558061607649e-07, + "loss": 0.0, + "num_input_tokens_seen": 12928384, + "step": 26245 + }, + { + "epoch": 3.4644318331793587, + "grad_norm": 0.018127327784895897, + "learning_rate": 5.217511591015595e-07, + "loss": 0.0, + "num_input_tokens_seen": 12930816, + "step": 26250 + }, + { + "epoch": 3.465091724957107, + "grad_norm": 0.000998675706796348, + "learning_rate": 5.213466135472072e-07, + "loss": 0.0066, + "num_input_tokens_seen": 12933568, + "step": 26255 + }, + { + "epoch": 3.4657516167348557, + "grad_norm": 0.009710119105875492, + "learning_rate": 5.209421695835701e-07, + "loss": 0.0005, + "num_input_tokens_seen": 12935872, + "step": 26260 + }, + { + "epoch": 3.466411508512604, + "grad_norm": 0.001298540853895247, + "learning_rate": 5.205378272964889e-07, + "loss": 0.0, + "num_input_tokens_seen": 12938176, + "step": 26265 + }, + { + "epoch": 3.467071400290352, + "grad_norm": 0.08667539060115814, + "learning_rate": 5.201335867717818e-07, + "loss": 0.0396, + "num_input_tokens_seen": 12940416, + "step": 26270 + }, + { + "epoch": 3.467731292068101, + "grad_norm": 0.022302474826574326, + "learning_rate": 5.197294480952452e-07, + "loss": 0.0072, + "num_input_tokens_seen": 12943040, + "step": 26275 + }, + { + "epoch": 3.468391183845849, + "grad_norm": 0.008083767257630825, + "learning_rate": 5.193254113526561e-07, + "loss": 0.0001, + "num_input_tokens_seen": 12945408, + "step": 26280 + }, + { + "epoch": 3.4690510756235975, + "grad_norm": 0.0010398940648883581, + "learning_rate": 5.189214766297675e-07, + "loss": 0.0396, + "num_input_tokens_seen": 12947840, + "step": 26285 + }, + { + "epoch": 3.469710967401346, + "grad_norm": 0.000684830010868609, + "learning_rate": 5.18517644012312e-07, + "loss": 0.0, + "num_input_tokens_seen": 12950016, + "step": 26290 + }, + { + "epoch": 3.4703708591790945, + "grad_norm": 1.880915641784668, + "learning_rate": 5.181139135859996e-07, + "loss": 0.0017, + "num_input_tokens_seen": 12952832, + "step": 26295 + }, + { + "epoch": 3.471030750956843, + "grad_norm": 0.0005736892344430089, + "learning_rate": 5.177102854365196e-07, + "loss": 0.0648, + "num_input_tokens_seen": 12955200, + "step": 26300 + }, + { + "epoch": 3.4716906427345915, + "grad_norm": 0.0024660734925419092, + "learning_rate": 5.173067596495393e-07, + "loss": 0.0, + "num_input_tokens_seen": 12957568, + "step": 26305 + }, + { + "epoch": 3.4723505345123398, + "grad_norm": 0.0006041477899998426, + "learning_rate": 5.16903336310703e-07, + "loss": 0.0, + "num_input_tokens_seen": 12960000, + "step": 26310 + }, + { + "epoch": 3.4730104262900885, + "grad_norm": 0.0010401011677458882, + "learning_rate": 5.165000155056363e-07, + "loss": 0.0, + "num_input_tokens_seen": 12962368, + "step": 26315 + }, + { + "epoch": 3.4736703180678368, + "grad_norm": 0.00012037860869895667, + "learning_rate": 5.1609679731994e-07, + "loss": 0.0007, + "num_input_tokens_seen": 12964864, + "step": 26320 + }, + { + "epoch": 3.4743302098455855, + "grad_norm": 30.517963409423828, + "learning_rate": 5.156936818391937e-07, + "loss": 0.147, + "num_input_tokens_seen": 12967488, + "step": 26325 + }, + { + "epoch": 3.4749901016233338, + "grad_norm": 0.0019168228609487414, + "learning_rate": 5.152906691489566e-07, + "loss": 0.0, + "num_input_tokens_seen": 12970112, + "step": 26330 + }, + { + "epoch": 3.475649993401082, + "grad_norm": 0.007496009115129709, + "learning_rate": 5.148877593347649e-07, + "loss": 0.0147, + "num_input_tokens_seen": 12972736, + "step": 26335 + }, + { + "epoch": 3.4763098851788308, + "grad_norm": 0.03964198753237724, + "learning_rate": 5.144849524821337e-07, + "loss": 0.0, + "num_input_tokens_seen": 12975296, + "step": 26340 + }, + { + "epoch": 3.476969776956579, + "grad_norm": 0.002376055810600519, + "learning_rate": 5.140822486765552e-07, + "loss": 0.0002, + "num_input_tokens_seen": 12977920, + "step": 26345 + }, + { + "epoch": 3.4776296687343278, + "grad_norm": 0.0003713663318194449, + "learning_rate": 5.136796480035007e-07, + "loss": 0.0, + "num_input_tokens_seen": 12980480, + "step": 26350 + }, + { + "epoch": 3.478289560512076, + "grad_norm": 0.004427397157996893, + "learning_rate": 5.132771505484197e-07, + "loss": 0.0, + "num_input_tokens_seen": 12983168, + "step": 26355 + }, + { + "epoch": 3.4789494522898243, + "grad_norm": 0.0713246613740921, + "learning_rate": 5.128747563967384e-07, + "loss": 0.0004, + "num_input_tokens_seen": 12985600, + "step": 26360 + }, + { + "epoch": 3.479609344067573, + "grad_norm": 0.0026552770286798477, + "learning_rate": 5.124724656338626e-07, + "loss": 0.0, + "num_input_tokens_seen": 12988416, + "step": 26365 + }, + { + "epoch": 3.4802692358453213, + "grad_norm": 0.012636465951800346, + "learning_rate": 5.12070278345176e-07, + "loss": 0.0, + "num_input_tokens_seen": 12990656, + "step": 26370 + }, + { + "epoch": 3.48092912762307, + "grad_norm": 0.0008887459989637136, + "learning_rate": 5.116681946160391e-07, + "loss": 0.0, + "num_input_tokens_seen": 12993408, + "step": 26375 + }, + { + "epoch": 3.4815890194008183, + "grad_norm": 0.0003574863658286631, + "learning_rate": 5.112662145317917e-07, + "loss": 0.0323, + "num_input_tokens_seen": 12995520, + "step": 26380 + }, + { + "epoch": 3.4822489111785666, + "grad_norm": 0.0011668046936392784, + "learning_rate": 5.108643381777511e-07, + "loss": 0.1026, + "num_input_tokens_seen": 12997824, + "step": 26385 + }, + { + "epoch": 3.4829088029563153, + "grad_norm": 0.0015160164330154657, + "learning_rate": 5.104625656392132e-07, + "loss": 0.0431, + "num_input_tokens_seen": 13000128, + "step": 26390 + }, + { + "epoch": 3.4835686947340636, + "grad_norm": 0.001212544390000403, + "learning_rate": 5.100608970014501e-07, + "loss": 0.0, + "num_input_tokens_seen": 13002624, + "step": 26395 + }, + { + "epoch": 3.484228586511812, + "grad_norm": 0.0007896866300143301, + "learning_rate": 5.09659332349714e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13005120, + "step": 26400 + }, + { + "epoch": 3.4848884782895606, + "grad_norm": 0.0667625218629837, + "learning_rate": 5.092578717692341e-07, + "loss": 0.0, + "num_input_tokens_seen": 13007808, + "step": 26405 + }, + { + "epoch": 3.485548370067309, + "grad_norm": 0.0005229983362369239, + "learning_rate": 5.088565153452171e-07, + "loss": 0.0523, + "num_input_tokens_seen": 13010048, + "step": 26410 + }, + { + "epoch": 3.486208261845057, + "grad_norm": 0.0036990130320191383, + "learning_rate": 5.084552631628479e-07, + "loss": 0.0, + "num_input_tokens_seen": 13012608, + "step": 26415 + }, + { + "epoch": 3.486868153622806, + "grad_norm": 0.004398142918944359, + "learning_rate": 5.080541153072902e-07, + "loss": 0.0472, + "num_input_tokens_seen": 13015040, + "step": 26420 + }, + { + "epoch": 3.487528045400554, + "grad_norm": 0.0012921980815008283, + "learning_rate": 5.076530718636834e-07, + "loss": 0.0, + "num_input_tokens_seen": 13017216, + "step": 26425 + }, + { + "epoch": 3.488187937178303, + "grad_norm": 0.14894424378871918, + "learning_rate": 5.07252132917147e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13019648, + "step": 26430 + }, + { + "epoch": 3.488847828956051, + "grad_norm": 15.87000560760498, + "learning_rate": 5.068512985527773e-07, + "loss": 0.0309, + "num_input_tokens_seen": 13021824, + "step": 26435 + }, + { + "epoch": 3.4895077207337994, + "grad_norm": 14.084748268127441, + "learning_rate": 5.064505688556486e-07, + "loss": 0.0554, + "num_input_tokens_seen": 13024192, + "step": 26440 + }, + { + "epoch": 3.490167612511548, + "grad_norm": 0.20227785408496857, + "learning_rate": 5.060499439108127e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13026624, + "step": 26445 + }, + { + "epoch": 3.4908275042892964, + "grad_norm": 0.004290015436708927, + "learning_rate": 5.056494238032985e-07, + "loss": 0.0, + "num_input_tokens_seen": 13029184, + "step": 26450 + }, + { + "epoch": 3.491487396067045, + "grad_norm": 0.12511903047561646, + "learning_rate": 5.052490086181151e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13031616, + "step": 26455 + }, + { + "epoch": 3.4921472878447934, + "grad_norm": 0.0507790669798851, + "learning_rate": 5.048486984402467e-07, + "loss": 0.0, + "num_input_tokens_seen": 13034048, + "step": 26460 + }, + { + "epoch": 3.4928071796225417, + "grad_norm": 0.10282183438539505, + "learning_rate": 5.044484933546565e-07, + "loss": 0.0032, + "num_input_tokens_seen": 13036480, + "step": 26465 + }, + { + "epoch": 3.4934670714002904, + "grad_norm": 0.010798219591379166, + "learning_rate": 5.040483934462849e-07, + "loss": 0.0, + "num_input_tokens_seen": 13039104, + "step": 26470 + }, + { + "epoch": 3.4941269631780387, + "grad_norm": 2.097705602645874, + "learning_rate": 5.036483988000504e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13041728, + "step": 26475 + }, + { + "epoch": 3.4947868549557874, + "grad_norm": 0.008413027971982956, + "learning_rate": 5.032485095008494e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13044288, + "step": 26480 + }, + { + "epoch": 3.4954467467335357, + "grad_norm": 0.0005098399124108255, + "learning_rate": 5.028487256335541e-07, + "loss": 0.0, + "num_input_tokens_seen": 13046784, + "step": 26485 + }, + { + "epoch": 3.496106638511284, + "grad_norm": 0.0010063229128718376, + "learning_rate": 5.024490472830176e-07, + "loss": 0.0, + "num_input_tokens_seen": 13049536, + "step": 26490 + }, + { + "epoch": 3.4967665302890327, + "grad_norm": 0.000572339806240052, + "learning_rate": 5.020494745340677e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13051968, + "step": 26495 + }, + { + "epoch": 3.497426422066781, + "grad_norm": 23.79427146911621, + "learning_rate": 5.016500074715108e-07, + "loss": 0.0899, + "num_input_tokens_seen": 13054592, + "step": 26500 + }, + { + "epoch": 3.4980863138445297, + "grad_norm": 0.001726845744997263, + "learning_rate": 5.01250646180131e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13057088, + "step": 26505 + }, + { + "epoch": 3.498746205622278, + "grad_norm": 0.0033422312699258327, + "learning_rate": 5.008513907446898e-07, + "loss": 0.0, + "num_input_tokens_seen": 13059712, + "step": 26510 + }, + { + "epoch": 3.4994060974000263, + "grad_norm": 0.0009347685845568776, + "learning_rate": 5.004522412499267e-07, + "loss": 0.0688, + "num_input_tokens_seen": 13062336, + "step": 26515 + }, + { + "epoch": 3.500065989177775, + "grad_norm": 0.03671286627650261, + "learning_rate": 5.000531977805575e-07, + "loss": 0.0, + "num_input_tokens_seen": 13064640, + "step": 26520 + }, + { + "epoch": 3.5007258809555233, + "grad_norm": 0.0007085995166562498, + "learning_rate": 4.99654260421277e-07, + "loss": 0.0441, + "num_input_tokens_seen": 13067328, + "step": 26525 + }, + { + "epoch": 3.501385772733272, + "grad_norm": 0.004408668261021376, + "learning_rate": 4.992554292567568e-07, + "loss": 0.0, + "num_input_tokens_seen": 13069824, + "step": 26530 + }, + { + "epoch": 3.501385772733272, + "eval_loss": 0.18093986809253693, + "eval_runtime": 7.9194, + "eval_samples_per_second": 850.447, + "eval_steps_per_second": 106.322, + "num_input_tokens_seen": 13069824, + "step": 26530 + }, + { + "epoch": 3.5020456645110203, + "grad_norm": 0.05824043229222298, + "learning_rate": 4.988567043716452e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13072000, + "step": 26535 + }, + { + "epoch": 3.5027055562887686, + "grad_norm": 0.4921933114528656, + "learning_rate": 4.984580858505691e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13074304, + "step": 26540 + }, + { + "epoch": 3.503365448066517, + "grad_norm": 0.008208317682147026, + "learning_rate": 4.980595737781328e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13076928, + "step": 26545 + }, + { + "epoch": 3.5040253398442656, + "grad_norm": 0.1486758440732956, + "learning_rate": 4.976611682389168e-07, + "loss": 0.0012, + "num_input_tokens_seen": 13079360, + "step": 26550 + }, + { + "epoch": 3.504685231622014, + "grad_norm": 0.0012614666484296322, + "learning_rate": 4.972628693174802e-07, + "loss": 0.0032, + "num_input_tokens_seen": 13081792, + "step": 26555 + }, + { + "epoch": 3.5053451233997626, + "grad_norm": 0.002648318186402321, + "learning_rate": 4.96864677098359e-07, + "loss": 0.0, + "num_input_tokens_seen": 13084352, + "step": 26560 + }, + { + "epoch": 3.506005015177511, + "grad_norm": 0.00365750421769917, + "learning_rate": 4.964665916660671e-07, + "loss": 0.0611, + "num_input_tokens_seen": 13086784, + "step": 26565 + }, + { + "epoch": 3.506664906955259, + "grad_norm": 0.10935722291469574, + "learning_rate": 4.960686131050945e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13089344, + "step": 26570 + }, + { + "epoch": 3.507324798733008, + "grad_norm": 0.006860377267003059, + "learning_rate": 4.956707414999095e-07, + "loss": 0.0, + "num_input_tokens_seen": 13091712, + "step": 26575 + }, + { + "epoch": 3.507984690510756, + "grad_norm": 0.0029719332233071327, + "learning_rate": 4.95272976934958e-07, + "loss": 0.0, + "num_input_tokens_seen": 13094208, + "step": 26580 + }, + { + "epoch": 3.508644582288505, + "grad_norm": 18.196840286254883, + "learning_rate": 4.948753194946617e-07, + "loss": 0.0759, + "num_input_tokens_seen": 13096768, + "step": 26585 + }, + { + "epoch": 3.509304474066253, + "grad_norm": 0.0007499216008000076, + "learning_rate": 4.944777692634211e-07, + "loss": 0.0011, + "num_input_tokens_seen": 13099264, + "step": 26590 + }, + { + "epoch": 3.5099643658440014, + "grad_norm": 0.0002792077139019966, + "learning_rate": 4.940803263256133e-07, + "loss": 0.0011, + "num_input_tokens_seen": 13101952, + "step": 26595 + }, + { + "epoch": 3.51062425762175, + "grad_norm": 0.0011871742317453027, + "learning_rate": 4.936829907655929e-07, + "loss": 0.0, + "num_input_tokens_seen": 13104704, + "step": 26600 + }, + { + "epoch": 3.5112841493994984, + "grad_norm": 0.0007210766198113561, + "learning_rate": 4.932857626676914e-07, + "loss": 0.0, + "num_input_tokens_seen": 13107072, + "step": 26605 + }, + { + "epoch": 3.511944041177247, + "grad_norm": 0.00100160192232579, + "learning_rate": 4.928886421162166e-07, + "loss": 0.0846, + "num_input_tokens_seen": 13109376, + "step": 26610 + }, + { + "epoch": 3.5126039329549954, + "grad_norm": 0.019927017390727997, + "learning_rate": 4.924916291954561e-07, + "loss": 0.0518, + "num_input_tokens_seen": 13111936, + "step": 26615 + }, + { + "epoch": 3.5132638247327437, + "grad_norm": 0.01956903748214245, + "learning_rate": 4.920947239896717e-07, + "loss": 0.0, + "num_input_tokens_seen": 13114432, + "step": 26620 + }, + { + "epoch": 3.5139237165104924, + "grad_norm": 0.007326959166675806, + "learning_rate": 4.916979265831043e-07, + "loss": 0.0, + "num_input_tokens_seen": 13116800, + "step": 26625 + }, + { + "epoch": 3.5145836082882407, + "grad_norm": 0.0005818351055495441, + "learning_rate": 4.913012370599715e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13119296, + "step": 26630 + }, + { + "epoch": 3.5152435000659894, + "grad_norm": 0.06792768090963364, + "learning_rate": 4.909046555044672e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13121664, + "step": 26635 + }, + { + "epoch": 3.5159033918437377, + "grad_norm": 24.14913558959961, + "learning_rate": 4.905081820007634e-07, + "loss": 0.0035, + "num_input_tokens_seen": 13124288, + "step": 26640 + }, + { + "epoch": 3.516563283621486, + "grad_norm": 0.002926342422142625, + "learning_rate": 4.901118166330077e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13126784, + "step": 26645 + }, + { + "epoch": 3.5172231753992347, + "grad_norm": 0.282175749540329, + "learning_rate": 4.897155594853275e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13129216, + "step": 26650 + }, + { + "epoch": 3.517883067176983, + "grad_norm": 0.006466337013989687, + "learning_rate": 4.893194106418246e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13131456, + "step": 26655 + }, + { + "epoch": 3.5185429589547317, + "grad_norm": 0.004010173957794905, + "learning_rate": 4.889233701865782e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13133824, + "step": 26660 + }, + { + "epoch": 3.51920285073248, + "grad_norm": 0.013327274471521378, + "learning_rate": 4.885274382036457e-07, + "loss": 0.0104, + "num_input_tokens_seen": 13136256, + "step": 26665 + }, + { + "epoch": 3.5198627425102282, + "grad_norm": 0.0012044442119076848, + "learning_rate": 4.881316147770607e-07, + "loss": 0.0014, + "num_input_tokens_seen": 13138944, + "step": 26670 + }, + { + "epoch": 3.5205226342879765, + "grad_norm": 0.0015769976889714599, + "learning_rate": 4.877358999908339e-07, + "loss": 0.0, + "num_input_tokens_seen": 13141504, + "step": 26675 + }, + { + "epoch": 3.5211825260657252, + "grad_norm": 0.10278157144784927, + "learning_rate": 4.873402939289527e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13143552, + "step": 26680 + }, + { + "epoch": 3.5218424178434735, + "grad_norm": 0.00024413972278125584, + "learning_rate": 4.869447966753816e-07, + "loss": 0.0, + "num_input_tokens_seen": 13146048, + "step": 26685 + }, + { + "epoch": 3.5225023096212222, + "grad_norm": 0.00010406988440081477, + "learning_rate": 4.865494083140627e-07, + "loss": 0.0, + "num_input_tokens_seen": 13148288, + "step": 26690 + }, + { + "epoch": 3.5231622013989705, + "grad_norm": 0.07188454270362854, + "learning_rate": 4.861541289289131e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13150720, + "step": 26695 + }, + { + "epoch": 3.523822093176719, + "grad_norm": 8.29110576887615e-05, + "learning_rate": 4.857589586038289e-07, + "loss": 0.0381, + "num_input_tokens_seen": 13153344, + "step": 26700 + }, + { + "epoch": 3.5244819849544675, + "grad_norm": 128.06494140625, + "learning_rate": 4.853638974226822e-07, + "loss": 0.0044, + "num_input_tokens_seen": 13155840, + "step": 26705 + }, + { + "epoch": 3.525141876732216, + "grad_norm": 0.4814980626106262, + "learning_rate": 4.849689454693212e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13158272, + "step": 26710 + }, + { + "epoch": 3.5258017685099645, + "grad_norm": 0.003013887209817767, + "learning_rate": 4.845741028275719e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13160640, + "step": 26715 + }, + { + "epoch": 3.526461660287713, + "grad_norm": 0.008023404516279697, + "learning_rate": 4.841793695812369e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13163008, + "step": 26720 + }, + { + "epoch": 3.527121552065461, + "grad_norm": 0.0011691105319187045, + "learning_rate": 4.837847458140959e-07, + "loss": 0.075, + "num_input_tokens_seen": 13165440, + "step": 26725 + }, + { + "epoch": 3.52778144384321, + "grad_norm": 0.0500674843788147, + "learning_rate": 4.833902316099039e-07, + "loss": 0.0, + "num_input_tokens_seen": 13167680, + "step": 26730 + }, + { + "epoch": 3.528441335620958, + "grad_norm": 0.0005932372878305614, + "learning_rate": 4.829958270523944e-07, + "loss": 0.0, + "num_input_tokens_seen": 13169728, + "step": 26735 + }, + { + "epoch": 3.529101227398707, + "grad_norm": 0.012722421437501907, + "learning_rate": 4.82601532225277e-07, + "loss": 0.0626, + "num_input_tokens_seen": 13172096, + "step": 26740 + }, + { + "epoch": 3.529761119176455, + "grad_norm": 0.013261332176625729, + "learning_rate": 4.822073472122374e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13174528, + "step": 26745 + }, + { + "epoch": 3.5304210109542034, + "grad_norm": 0.00034718215465545654, + "learning_rate": 4.818132720969387e-07, + "loss": 0.0, + "num_input_tokens_seen": 13176960, + "step": 26750 + }, + { + "epoch": 3.531080902731952, + "grad_norm": 5.128545761108398, + "learning_rate": 4.814193069630211e-07, + "loss": 0.002, + "num_input_tokens_seen": 13179328, + "step": 26755 + }, + { + "epoch": 3.5317407945097004, + "grad_norm": 0.00042824147385545075, + "learning_rate": 4.810254518941e-07, + "loss": 0.0, + "num_input_tokens_seen": 13181824, + "step": 26760 + }, + { + "epoch": 3.532400686287449, + "grad_norm": 0.035186175256967545, + "learning_rate": 4.806317069737684e-07, + "loss": 0.0, + "num_input_tokens_seen": 13184256, + "step": 26765 + }, + { + "epoch": 3.5330605780651974, + "grad_norm": 0.0006575493607670069, + "learning_rate": 4.802380722855961e-07, + "loss": 0.0, + "num_input_tokens_seen": 13186560, + "step": 26770 + }, + { + "epoch": 3.5337204698429456, + "grad_norm": 0.0046204268001019955, + "learning_rate": 4.798445479131295e-07, + "loss": 0.0, + "num_input_tokens_seen": 13189120, + "step": 26775 + }, + { + "epoch": 3.5343803616206944, + "grad_norm": 0.0010483438381925225, + "learning_rate": 4.794511339398911e-07, + "loss": 0.0, + "num_input_tokens_seen": 13191552, + "step": 26780 + }, + { + "epoch": 3.5350402533984426, + "grad_norm": 0.0001779908052412793, + "learning_rate": 4.790578304493791e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13193856, + "step": 26785 + }, + { + "epoch": 3.5357001451761914, + "grad_norm": 0.0018780305981636047, + "learning_rate": 4.786646375250711e-07, + "loss": 0.0891, + "num_input_tokens_seen": 13196288, + "step": 26790 + }, + { + "epoch": 3.5363600369539396, + "grad_norm": 0.0004961126251146197, + "learning_rate": 4.78271555250418e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13198720, + "step": 26795 + }, + { + "epoch": 3.537019928731688, + "grad_norm": 0.0003020392032340169, + "learning_rate": 4.778785837088497e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13201152, + "step": 26800 + }, + { + "epoch": 3.537679820509436, + "grad_norm": 0.0001238805562024936, + "learning_rate": 4.774857229837708e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13203584, + "step": 26805 + }, + { + "epoch": 3.538339712287185, + "grad_norm": 0.0013724665623158216, + "learning_rate": 4.770929731585634e-07, + "loss": 0.0, + "num_input_tokens_seen": 13206016, + "step": 26810 + }, + { + "epoch": 3.538999604064933, + "grad_norm": 39.34675216674805, + "learning_rate": 4.7670033431658605e-07, + "loss": 0.0797, + "num_input_tokens_seen": 13208256, + "step": 26815 + }, + { + "epoch": 3.539659495842682, + "grad_norm": 0.0005967853940092027, + "learning_rate": 4.7630780654117273e-07, + "loss": 0.0, + "num_input_tokens_seen": 13210880, + "step": 26820 + }, + { + "epoch": 3.54031938762043, + "grad_norm": 0.0013899434125050902, + "learning_rate": 4.7591538991563594e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13213248, + "step": 26825 + }, + { + "epoch": 3.5409792793981785, + "grad_norm": 0.00026954489294439554, + "learning_rate": 4.755230845232625e-07, + "loss": 0.0, + "num_input_tokens_seen": 13215616, + "step": 26830 + }, + { + "epoch": 3.541639171175927, + "grad_norm": 0.00046751563786529005, + "learning_rate": 4.7513089044731603e-07, + "loss": 0.0213, + "num_input_tokens_seen": 13217920, + "step": 26835 + }, + { + "epoch": 3.5422990629536755, + "grad_norm": 4.4770134991267696e-05, + "learning_rate": 4.7473880777103725e-07, + "loss": 0.0, + "num_input_tokens_seen": 13220288, + "step": 26840 + }, + { + "epoch": 3.542958954731424, + "grad_norm": 0.0011170512298122048, + "learning_rate": 4.74346836577643e-07, + "loss": 0.0487, + "num_input_tokens_seen": 13223040, + "step": 26845 + }, + { + "epoch": 3.5436188465091725, + "grad_norm": 0.1462031602859497, + "learning_rate": 4.7395497695032637e-07, + "loss": 0.0017, + "num_input_tokens_seen": 13225152, + "step": 26850 + }, + { + "epoch": 3.5442787382869207, + "grad_norm": 0.004078585188835859, + "learning_rate": 4.735632289722563e-07, + "loss": 0.0, + "num_input_tokens_seen": 13227648, + "step": 26855 + }, + { + "epoch": 3.5449386300646695, + "grad_norm": 0.00015965760394465178, + "learning_rate": 4.731715927265787e-07, + "loss": 0.0, + "num_input_tokens_seen": 13230080, + "step": 26860 + }, + { + "epoch": 3.5455985218424177, + "grad_norm": 133.80470275878906, + "learning_rate": 4.727800682964159e-07, + "loss": 0.1657, + "num_input_tokens_seen": 13232768, + "step": 26865 + }, + { + "epoch": 3.5462584136201665, + "grad_norm": 0.006459526252001524, + "learning_rate": 4.723886557648655e-07, + "loss": 0.0, + "num_input_tokens_seen": 13235008, + "step": 26870 + }, + { + "epoch": 3.5469183053979148, + "grad_norm": 0.025274749845266342, + "learning_rate": 4.719973552150022e-07, + "loss": 0.0839, + "num_input_tokens_seen": 13237696, + "step": 26875 + }, + { + "epoch": 3.547578197175663, + "grad_norm": 0.004373045172542334, + "learning_rate": 4.7160616672987674e-07, + "loss": 0.0, + "num_input_tokens_seen": 13240192, + "step": 26880 + }, + { + "epoch": 3.5482380889534118, + "grad_norm": 0.0033362486865371466, + "learning_rate": 4.712150903925165e-07, + "loss": 0.0, + "num_input_tokens_seen": 13242496, + "step": 26885 + }, + { + "epoch": 3.54889798073116, + "grad_norm": 0.001436105347238481, + "learning_rate": 4.708241262859237e-07, + "loss": 0.0, + "num_input_tokens_seen": 13244864, + "step": 26890 + }, + { + "epoch": 3.5495578725089088, + "grad_norm": 0.0019471053965389729, + "learning_rate": 4.7043327449307813e-07, + "loss": 0.0, + "num_input_tokens_seen": 13247104, + "step": 26895 + }, + { + "epoch": 3.550217764286657, + "grad_norm": 0.0009903459576889873, + "learning_rate": 4.700425350969357e-07, + "loss": 0.0, + "num_input_tokens_seen": 13249536, + "step": 26900 + }, + { + "epoch": 3.5508776560644053, + "grad_norm": 1.5322378873825073, + "learning_rate": 4.696519081804271e-07, + "loss": 0.001, + "num_input_tokens_seen": 13251904, + "step": 26905 + }, + { + "epoch": 3.551537547842154, + "grad_norm": 0.0005009761080145836, + "learning_rate": 4.6926139382646045e-07, + "loss": 0.0, + "num_input_tokens_seen": 13254336, + "step": 26910 + }, + { + "epoch": 3.5521974396199023, + "grad_norm": 0.02354281395673752, + "learning_rate": 4.6887099211792016e-07, + "loss": 0.0, + "num_input_tokens_seen": 13257088, + "step": 26915 + }, + { + "epoch": 3.552857331397651, + "grad_norm": 0.003061910392716527, + "learning_rate": 4.6848070313766507e-07, + "loss": 0.0427, + "num_input_tokens_seen": 13259584, + "step": 26920 + }, + { + "epoch": 3.5535172231753993, + "grad_norm": 0.0007698460831306875, + "learning_rate": 4.68090526968532e-07, + "loss": 0.0, + "num_input_tokens_seen": 13262208, + "step": 26925 + }, + { + "epoch": 3.5541771149531476, + "grad_norm": 0.004608421120792627, + "learning_rate": 4.677004636933327e-07, + "loss": 0.052, + "num_input_tokens_seen": 13264704, + "step": 26930 + }, + { + "epoch": 3.554837006730896, + "grad_norm": 0.06963248550891876, + "learning_rate": 4.673105133948557e-07, + "loss": 0.0, + "num_input_tokens_seen": 13266816, + "step": 26935 + }, + { + "epoch": 3.5554968985086446, + "grad_norm": 0.00527073023840785, + "learning_rate": 4.6692067615586493e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13269120, + "step": 26940 + }, + { + "epoch": 3.556156790286393, + "grad_norm": 0.02549833059310913, + "learning_rate": 4.6653095205909955e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13271232, + "step": 26945 + }, + { + "epoch": 3.5568166820641416, + "grad_norm": 0.009850227274000645, + "learning_rate": 4.661413411872772e-07, + "loss": 0.0, + "num_input_tokens_seen": 13273536, + "step": 26950 + }, + { + "epoch": 3.55747657384189, + "grad_norm": 3.481121778488159, + "learning_rate": 4.6575184362308904e-07, + "loss": 0.0281, + "num_input_tokens_seen": 13276160, + "step": 26955 + }, + { + "epoch": 3.558136465619638, + "grad_norm": 0.0004499904753174633, + "learning_rate": 4.653624594492033e-07, + "loss": 0.0, + "num_input_tokens_seen": 13278336, + "step": 26960 + }, + { + "epoch": 3.558796357397387, + "grad_norm": 0.3562501072883606, + "learning_rate": 4.649731887482644e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13280832, + "step": 26965 + }, + { + "epoch": 3.559456249175135, + "grad_norm": 0.01617368683218956, + "learning_rate": 4.645840316028914e-07, + "loss": 0.0, + "num_input_tokens_seen": 13283328, + "step": 26970 + }, + { + "epoch": 3.560116140952884, + "grad_norm": 0.0003085601201746613, + "learning_rate": 4.641949880956809e-07, + "loss": 0.002, + "num_input_tokens_seen": 13285632, + "step": 26975 + }, + { + "epoch": 3.560776032730632, + "grad_norm": 0.008649193681776524, + "learning_rate": 4.638060583092035e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13287872, + "step": 26980 + }, + { + "epoch": 3.5614359245083804, + "grad_norm": 0.17648448050022125, + "learning_rate": 4.634172423260081e-07, + "loss": 0.069, + "num_input_tokens_seen": 13290560, + "step": 26985 + }, + { + "epoch": 3.562095816286129, + "grad_norm": 0.0014605034375563264, + "learning_rate": 4.6302854022861735e-07, + "loss": 0.0322, + "num_input_tokens_seen": 13293056, + "step": 26990 + }, + { + "epoch": 3.5627557080638774, + "grad_norm": 0.0008046274306252599, + "learning_rate": 4.6263995209953024e-07, + "loss": 0.0, + "num_input_tokens_seen": 13295488, + "step": 26995 + }, + { + "epoch": 3.563415599841626, + "grad_norm": 0.0001721447624731809, + "learning_rate": 4.622514780212219e-07, + "loss": 0.0, + "num_input_tokens_seen": 13297856, + "step": 27000 + }, + { + "epoch": 3.5640754916193744, + "grad_norm": 0.009151825681328773, + "learning_rate": 4.618631180761434e-07, + "loss": 0.0322, + "num_input_tokens_seen": 13300416, + "step": 27005 + }, + { + "epoch": 3.5647353833971227, + "grad_norm": 0.000537705491296947, + "learning_rate": 4.6147487234672156e-07, + "loss": 0.0, + "num_input_tokens_seen": 13302848, + "step": 27010 + }, + { + "epoch": 3.5653952751748714, + "grad_norm": 0.0005098577821627259, + "learning_rate": 4.6108674091535795e-07, + "loss": 0.0, + "num_input_tokens_seen": 13305344, + "step": 27015 + }, + { + "epoch": 3.5660551669526197, + "grad_norm": 0.0005886783474124968, + "learning_rate": 4.6069872386443107e-07, + "loss": 0.0, + "num_input_tokens_seen": 13307840, + "step": 27020 + }, + { + "epoch": 3.5667150587303684, + "grad_norm": 0.000730705913156271, + "learning_rate": 4.6031082127629514e-07, + "loss": 0.0323, + "num_input_tokens_seen": 13310208, + "step": 27025 + }, + { + "epoch": 3.5673749505081167, + "grad_norm": 0.01606333628296852, + "learning_rate": 4.5992303323327885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13312576, + "step": 27030 + }, + { + "epoch": 3.568034842285865, + "grad_norm": 0.006714434828609228, + "learning_rate": 4.5953535981768786e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13314752, + "step": 27035 + }, + { + "epoch": 3.5686947340636137, + "grad_norm": 0.0010593448532745242, + "learning_rate": 4.591478011118034e-07, + "loss": 0.0, + "num_input_tokens_seen": 13317184, + "step": 27040 + }, + { + "epoch": 3.569354625841362, + "grad_norm": 0.00020904975826852024, + "learning_rate": 4.5876035719788133e-07, + "loss": 0.0585, + "num_input_tokens_seen": 13320128, + "step": 27045 + }, + { + "epoch": 3.5700145176191107, + "grad_norm": 0.000504173047374934, + "learning_rate": 4.5837302815815394e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13322816, + "step": 27050 + }, + { + "epoch": 3.570674409396859, + "grad_norm": 9.949973173206672e-05, + "learning_rate": 4.5798581407482927e-07, + "loss": 0.0, + "num_input_tokens_seen": 13325248, + "step": 27055 + }, + { + "epoch": 3.5713343011746073, + "grad_norm": 0.0005612291861325502, + "learning_rate": 4.5759871503009097e-07, + "loss": 0.0693, + "num_input_tokens_seen": 13327680, + "step": 27060 + }, + { + "epoch": 3.5719941929523555, + "grad_norm": 0.03958077356219292, + "learning_rate": 4.572117311060972e-07, + "loss": 0.0, + "num_input_tokens_seen": 13329984, + "step": 27065 + }, + { + "epoch": 3.5726540847301043, + "grad_norm": 0.0063445377163589, + "learning_rate": 4.56824862384983e-07, + "loss": 0.0719, + "num_input_tokens_seen": 13332288, + "step": 27070 + }, + { + "epoch": 3.573313976507853, + "grad_norm": 0.0007765923510305583, + "learning_rate": 4.564381089488587e-07, + "loss": 0.0176, + "num_input_tokens_seen": 13334400, + "step": 27075 + }, + { + "epoch": 3.5739738682856013, + "grad_norm": 0.0009005771134980023, + "learning_rate": 4.560514708798093e-07, + "loss": 0.1063, + "num_input_tokens_seen": 13337024, + "step": 27080 + }, + { + "epoch": 3.5746337600633495, + "grad_norm": 0.025038283318281174, + "learning_rate": 4.556649482598962e-07, + "loss": 0.0, + "num_input_tokens_seen": 13339328, + "step": 27085 + }, + { + "epoch": 3.575293651841098, + "grad_norm": 0.0005765855894424021, + "learning_rate": 4.552785411711565e-07, + "loss": 0.0412, + "num_input_tokens_seen": 13341632, + "step": 27090 + }, + { + "epoch": 3.5759535436188465, + "grad_norm": 3.052090883255005, + "learning_rate": 4.548922496956015e-07, + "loss": 0.0011, + "num_input_tokens_seen": 13343936, + "step": 27095 + }, + { + "epoch": 3.576613435396595, + "grad_norm": 0.0007220639381557703, + "learning_rate": 4.54506073915219e-07, + "loss": 0.0, + "num_input_tokens_seen": 13346688, + "step": 27100 + }, + { + "epoch": 3.5772733271743435, + "grad_norm": 0.0006462151068262756, + "learning_rate": 4.541200139119723e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13349376, + "step": 27105 + }, + { + "epoch": 3.577933218952092, + "grad_norm": 0.009303468279540539, + "learning_rate": 4.537340697678e-07, + "loss": 0.0673, + "num_input_tokens_seen": 13351680, + "step": 27110 + }, + { + "epoch": 3.57859311072984, + "grad_norm": 0.001099230838008225, + "learning_rate": 4.533482415646157e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13354048, + "step": 27115 + }, + { + "epoch": 3.579253002507589, + "grad_norm": 11.371760368347168, + "learning_rate": 4.529625293843078e-07, + "loss": 0.004, + "num_input_tokens_seen": 13356416, + "step": 27120 + }, + { + "epoch": 3.579912894285337, + "grad_norm": 0.012447909452021122, + "learning_rate": 4.525769333087425e-07, + "loss": 0.0, + "num_input_tokens_seen": 13358592, + "step": 27125 + }, + { + "epoch": 3.580572786063086, + "grad_norm": 0.0015077232383191586, + "learning_rate": 4.521914534197585e-07, + "loss": 0.0, + "num_input_tokens_seen": 13361216, + "step": 27130 + }, + { + "epoch": 3.581232677840834, + "grad_norm": 0.0013660573167726398, + "learning_rate": 4.518060897991721e-07, + "loss": 0.0, + "num_input_tokens_seen": 13363392, + "step": 27135 + }, + { + "epoch": 3.5818925696185824, + "grad_norm": 0.003877345472574234, + "learning_rate": 4.51420842528773e-07, + "loss": 0.0, + "num_input_tokens_seen": 13365760, + "step": 27140 + }, + { + "epoch": 3.582552461396331, + "grad_norm": 0.0003621858195401728, + "learning_rate": 4.510357116903275e-07, + "loss": 0.0, + "num_input_tokens_seen": 13368000, + "step": 27145 + }, + { + "epoch": 3.5832123531740794, + "grad_norm": 0.00024860017583705485, + "learning_rate": 4.5065069736557737e-07, + "loss": 0.0, + "num_input_tokens_seen": 13370368, + "step": 27150 + }, + { + "epoch": 3.583872244951828, + "grad_norm": 0.0033539736177772284, + "learning_rate": 4.502657996362379e-07, + "loss": 0.0, + "num_input_tokens_seen": 13373248, + "step": 27155 + }, + { + "epoch": 3.5845321367295764, + "grad_norm": 1.584349513053894, + "learning_rate": 4.498810185840023e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13375488, + "step": 27160 + }, + { + "epoch": 3.5851920285073247, + "grad_norm": 1.3402936458587646, + "learning_rate": 4.494963542905369e-07, + "loss": 0.0873, + "num_input_tokens_seen": 13377856, + "step": 27165 + }, + { + "epoch": 3.5858519202850734, + "grad_norm": 0.013916954398155212, + "learning_rate": 4.491118068374835e-07, + "loss": 0.0007, + "num_input_tokens_seen": 13380544, + "step": 27170 + }, + { + "epoch": 3.5865118120628217, + "grad_norm": 0.0025454284623265266, + "learning_rate": 4.4872737630645984e-07, + "loss": 0.0, + "num_input_tokens_seen": 13382912, + "step": 27175 + }, + { + "epoch": 3.5871717038405704, + "grad_norm": 0.0019730727653950453, + "learning_rate": 4.4834306277905855e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13385152, + "step": 27180 + }, + { + "epoch": 3.5878315956183187, + "grad_norm": 0.0019074628362432122, + "learning_rate": 4.4795886633684776e-07, + "loss": 0.0, + "num_input_tokens_seen": 13387392, + "step": 27185 + }, + { + "epoch": 3.588491487396067, + "grad_norm": 0.06505951285362244, + "learning_rate": 4.4757478706136974e-07, + "loss": 0.0472, + "num_input_tokens_seen": 13389696, + "step": 27190 + }, + { + "epoch": 3.5891513791738157, + "grad_norm": 0.006583169102668762, + "learning_rate": 4.4719082503414273e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13391872, + "step": 27195 + }, + { + "epoch": 3.589811270951564, + "grad_norm": 0.00811641477048397, + "learning_rate": 4.468069803366604e-07, + "loss": 0.0, + "num_input_tokens_seen": 13394048, + "step": 27200 + }, + { + "epoch": 3.5904711627293127, + "grad_norm": 0.0005078451940789819, + "learning_rate": 4.464232530503902e-07, + "loss": 0.0, + "num_input_tokens_seen": 13396672, + "step": 27205 + }, + { + "epoch": 3.591131054507061, + "grad_norm": 27.991722106933594, + "learning_rate": 4.460396432567759e-07, + "loss": 0.1157, + "num_input_tokens_seen": 13399232, + "step": 27210 + }, + { + "epoch": 3.591790946284809, + "grad_norm": 0.0011901069665327668, + "learning_rate": 4.456561510372358e-07, + "loss": 0.0, + "num_input_tokens_seen": 13401600, + "step": 27215 + }, + { + "epoch": 3.5924508380625575, + "grad_norm": 0.008773591369390488, + "learning_rate": 4.4527277647316375e-07, + "loss": 0.0, + "num_input_tokens_seen": 13404160, + "step": 27220 + }, + { + "epoch": 3.593110729840306, + "grad_norm": 0.0010363340843468904, + "learning_rate": 4.448895196459275e-07, + "loss": 0.0016, + "num_input_tokens_seen": 13406592, + "step": 27225 + }, + { + "epoch": 3.5937706216180545, + "grad_norm": 0.9809166789054871, + "learning_rate": 4.4450638063687094e-07, + "loss": 0.0012, + "num_input_tokens_seen": 13409152, + "step": 27230 + }, + { + "epoch": 3.594430513395803, + "grad_norm": 0.12178061902523041, + "learning_rate": 4.4412335952731284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13411776, + "step": 27235 + }, + { + "epoch": 3.5950904051735515, + "grad_norm": 0.0018833117792382836, + "learning_rate": 4.437404563985461e-07, + "loss": 0.0009, + "num_input_tokens_seen": 13414272, + "step": 27240 + }, + { + "epoch": 3.5957502969512998, + "grad_norm": 17.3830623626709, + "learning_rate": 4.4335767133183923e-07, + "loss": 0.0169, + "num_input_tokens_seen": 13416832, + "step": 27245 + }, + { + "epoch": 3.5964101887290485, + "grad_norm": 40.81116485595703, + "learning_rate": 4.4297500440843616e-07, + "loss": 0.075, + "num_input_tokens_seen": 13419136, + "step": 27250 + }, + { + "epoch": 3.5970700805067968, + "grad_norm": 0.001731569180265069, + "learning_rate": 4.4259245570955437e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13421632, + "step": 27255 + }, + { + "epoch": 3.5977299722845455, + "grad_norm": 0.002258374122902751, + "learning_rate": 4.422100253163874e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13424000, + "step": 27260 + }, + { + "epoch": 3.5983898640622938, + "grad_norm": 0.0061827958561480045, + "learning_rate": 4.4182771331010347e-07, + "loss": 0.0337, + "num_input_tokens_seen": 13426368, + "step": 27265 + }, + { + "epoch": 3.599049755840042, + "grad_norm": 5.052387237548828, + "learning_rate": 4.414455197718457e-07, + "loss": 0.002, + "num_input_tokens_seen": 13428608, + "step": 27270 + }, + { + "epoch": 3.5997096476177908, + "grad_norm": 0.6098920702934265, + "learning_rate": 4.410634447827316e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13430848, + "step": 27275 + }, + { + "epoch": 3.600369539395539, + "grad_norm": 0.00019582083041314036, + "learning_rate": 4.406814884238532e-07, + "loss": 0.1103, + "num_input_tokens_seen": 13433280, + "step": 27280 + }, + { + "epoch": 3.6010294311732878, + "grad_norm": 0.08573462069034576, + "learning_rate": 4.4029965077627927e-07, + "loss": 0.0383, + "num_input_tokens_seen": 13435584, + "step": 27285 + }, + { + "epoch": 3.601689322951036, + "grad_norm": 0.006321965716779232, + "learning_rate": 4.399179319210511e-07, + "loss": 0.0, + "num_input_tokens_seen": 13438080, + "step": 27290 + }, + { + "epoch": 3.6023492147287843, + "grad_norm": 0.0017303403001278639, + "learning_rate": 4.3953633193918606e-07, + "loss": 0.0, + "num_input_tokens_seen": 13440832, + "step": 27295 + }, + { + "epoch": 3.603009106506533, + "grad_norm": 20.29780387878418, + "learning_rate": 4.3915485091167647e-07, + "loss": 0.1113, + "num_input_tokens_seen": 13443520, + "step": 27300 + }, + { + "epoch": 3.6036689982842813, + "grad_norm": 0.0009621708304621279, + "learning_rate": 4.3877348891948794e-07, + "loss": 0.0, + "num_input_tokens_seen": 13445824, + "step": 27305 + }, + { + "epoch": 3.60432889006203, + "grad_norm": 0.0025498121976852417, + "learning_rate": 4.3839224604356274e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13448512, + "step": 27310 + }, + { + "epoch": 3.6049887818397783, + "grad_norm": 0.00046822839067317545, + "learning_rate": 4.3801112236481575e-07, + "loss": 0.0, + "num_input_tokens_seen": 13450944, + "step": 27315 + }, + { + "epoch": 3.6056486736175266, + "grad_norm": 0.003700240980833769, + "learning_rate": 4.3763011796413915e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13453376, + "step": 27320 + }, + { + "epoch": 3.6063085653952753, + "grad_norm": 0.052340708673000336, + "learning_rate": 4.372492329223977e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13455936, + "step": 27325 + }, + { + "epoch": 3.6069684571730236, + "grad_norm": 0.023182492703199387, + "learning_rate": 4.3686846732043105e-07, + "loss": 0.0281, + "num_input_tokens_seen": 13458560, + "step": 27330 + }, + { + "epoch": 3.6076283489507723, + "grad_norm": 0.08036337792873383, + "learning_rate": 4.3648782123905424e-07, + "loss": 0.0626, + "num_input_tokens_seen": 13460864, + "step": 27335 + }, + { + "epoch": 3.6082882407285206, + "grad_norm": 0.01171074528247118, + "learning_rate": 4.361072947590568e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13463360, + "step": 27340 + }, + { + "epoch": 3.608948132506269, + "grad_norm": 0.0003683822287712246, + "learning_rate": 4.3572688796120307e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13466112, + "step": 27345 + }, + { + "epoch": 3.609608024284017, + "grad_norm": 1.8278930187225342, + "learning_rate": 4.353466009262309e-07, + "loss": 0.001, + "num_input_tokens_seen": 13468800, + "step": 27350 + }, + { + "epoch": 3.610267916061766, + "grad_norm": 0.0013008704409003258, + "learning_rate": 4.3496643373485367e-07, + "loss": 0.0226, + "num_input_tokens_seen": 13471296, + "step": 27355 + }, + { + "epoch": 3.610927807839514, + "grad_norm": 0.0005728037795051932, + "learning_rate": 4.345863864677596e-07, + "loss": 0.0, + "num_input_tokens_seen": 13473728, + "step": 27360 + }, + { + "epoch": 3.611587699617263, + "grad_norm": 0.1258080005645752, + "learning_rate": 4.342064592056103e-07, + "loss": 0.0009, + "num_input_tokens_seen": 13476032, + "step": 27365 + }, + { + "epoch": 3.612247591395011, + "grad_norm": 0.0041159396059811115, + "learning_rate": 4.338266520290428e-07, + "loss": 0.0, + "num_input_tokens_seen": 13478592, + "step": 27370 + }, + { + "epoch": 3.6129074831727594, + "grad_norm": 25.62404441833496, + "learning_rate": 4.3344696501866893e-07, + "loss": 0.0688, + "num_input_tokens_seen": 13481088, + "step": 27375 + }, + { + "epoch": 3.613567374950508, + "grad_norm": 0.0026808977127075195, + "learning_rate": 4.330673982550738e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13483328, + "step": 27380 + }, + { + "epoch": 3.6142272667282564, + "grad_norm": 0.0003970061952713877, + "learning_rate": 4.326879518188178e-07, + "loss": 0.0, + "num_input_tokens_seen": 13485888, + "step": 27385 + }, + { + "epoch": 3.614887158506005, + "grad_norm": 0.003292738925665617, + "learning_rate": 4.323086257904359e-07, + "loss": 0.0, + "num_input_tokens_seen": 13488512, + "step": 27390 + }, + { + "epoch": 3.6155470502837534, + "grad_norm": 0.0002803392708301544, + "learning_rate": 4.319294202504378e-07, + "loss": 0.0, + "num_input_tokens_seen": 13490688, + "step": 27395 + }, + { + "epoch": 3.6162069420615017, + "grad_norm": 0.0007644235738553107, + "learning_rate": 4.3155033527930606e-07, + "loss": 0.0, + "num_input_tokens_seen": 13492992, + "step": 27400 + }, + { + "epoch": 3.6168668338392505, + "grad_norm": 0.16152521967887878, + "learning_rate": 4.3117137095749945e-07, + "loss": 0.0201, + "num_input_tokens_seen": 13495360, + "step": 27405 + }, + { + "epoch": 3.6175267256169987, + "grad_norm": 1.5119383335113525, + "learning_rate": 4.307925273654505e-07, + "loss": 0.0018, + "num_input_tokens_seen": 13497792, + "step": 27410 + }, + { + "epoch": 3.6181866173947475, + "grad_norm": 5.329381019691937e-05, + "learning_rate": 4.3041380458356534e-07, + "loss": 0.0, + "num_input_tokens_seen": 13500224, + "step": 27415 + }, + { + "epoch": 3.6188465091724957, + "grad_norm": 0.00558890588581562, + "learning_rate": 4.3003520269222557e-07, + "loss": 0.0, + "num_input_tokens_seen": 13502400, + "step": 27420 + }, + { + "epoch": 3.619506400950244, + "grad_norm": 0.009107373654842377, + "learning_rate": 4.29656721771787e-07, + "loss": 0.0533, + "num_input_tokens_seen": 13505216, + "step": 27425 + }, + { + "epoch": 3.6201662927279927, + "grad_norm": 0.00024875879171304405, + "learning_rate": 4.292783619025788e-07, + "loss": 0.0549, + "num_input_tokens_seen": 13507520, + "step": 27430 + }, + { + "epoch": 3.620826184505741, + "grad_norm": 0.019647786393761635, + "learning_rate": 4.289001231649054e-07, + "loss": 0.0176, + "num_input_tokens_seen": 13510144, + "step": 27435 + }, + { + "epoch": 3.6214860762834897, + "grad_norm": 0.09665568917989731, + "learning_rate": 4.285220056390454e-07, + "loss": 0.0595, + "num_input_tokens_seen": 13512640, + "step": 27440 + }, + { + "epoch": 3.622145968061238, + "grad_norm": 0.0013785201590508223, + "learning_rate": 4.2814400940525164e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13515200, + "step": 27445 + }, + { + "epoch": 3.6228058598389863, + "grad_norm": 0.008707517758011818, + "learning_rate": 4.2776613454375087e-07, + "loss": 0.0, + "num_input_tokens_seen": 13517568, + "step": 27450 + }, + { + "epoch": 3.623465751616735, + "grad_norm": 0.02214517630636692, + "learning_rate": 4.2738838113474353e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13520064, + "step": 27455 + }, + { + "epoch": 3.6241256433944833, + "grad_norm": 0.008972934447228909, + "learning_rate": 4.2701074925840643e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13522688, + "step": 27460 + }, + { + "epoch": 3.624785535172232, + "grad_norm": 0.005197230726480484, + "learning_rate": 4.266332389948882e-07, + "loss": 0.0, + "num_input_tokens_seen": 13525376, + "step": 27465 + }, + { + "epoch": 3.6254454269499803, + "grad_norm": 0.0023379288613796234, + "learning_rate": 4.2625585042431347e-07, + "loss": 0.0, + "num_input_tokens_seen": 13527680, + "step": 27470 + }, + { + "epoch": 3.6261053187277286, + "grad_norm": 0.013858218677341938, + "learning_rate": 4.258785836267792e-07, + "loss": 0.0, + "num_input_tokens_seen": 13530112, + "step": 27475 + }, + { + "epoch": 3.626765210505477, + "grad_norm": 0.12098375707864761, + "learning_rate": 4.255014386823582e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13532416, + "step": 27480 + }, + { + "epoch": 3.6274251022832256, + "grad_norm": 0.0001731712545733899, + "learning_rate": 4.25124415671097e-07, + "loss": 0.0, + "num_input_tokens_seen": 13535040, + "step": 27485 + }, + { + "epoch": 3.628084994060974, + "grad_norm": 0.0029791847337037325, + "learning_rate": 4.24747514673015e-07, + "loss": 0.0008, + "num_input_tokens_seen": 13537408, + "step": 27490 + }, + { + "epoch": 3.6287448858387226, + "grad_norm": 12.79725456237793, + "learning_rate": 4.24370735768108e-07, + "loss": 0.0457, + "num_input_tokens_seen": 13539584, + "step": 27495 + }, + { + "epoch": 3.629404777616471, + "grad_norm": 0.0019413211848586798, + "learning_rate": 4.23994079036344e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13542144, + "step": 27500 + }, + { + "epoch": 3.630064669394219, + "grad_norm": 0.0006674039177596569, + "learning_rate": 4.2361754455766517e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13544576, + "step": 27505 + }, + { + "epoch": 3.630724561171968, + "grad_norm": 0.013020829297602177, + "learning_rate": 4.232411324119888e-07, + "loss": 0.0, + "num_input_tokens_seen": 13546880, + "step": 27510 + }, + { + "epoch": 3.631384452949716, + "grad_norm": 0.00012012778461212292, + "learning_rate": 4.228648426792054e-07, + "loss": 0.0736, + "num_input_tokens_seen": 13549440, + "step": 27515 + }, + { + "epoch": 3.632044344727465, + "grad_norm": 0.11743014305830002, + "learning_rate": 4.224886754391803e-07, + "loss": 0.0, + "num_input_tokens_seen": 13552000, + "step": 27520 + }, + { + "epoch": 3.632704236505213, + "grad_norm": 0.009049608372151852, + "learning_rate": 4.2211263077175144e-07, + "loss": 0.001, + "num_input_tokens_seen": 13554688, + "step": 27525 + }, + { + "epoch": 3.6333641282829614, + "grad_norm": 0.2897687554359436, + "learning_rate": 4.2173670875673197e-07, + "loss": 0.0611, + "num_input_tokens_seen": 13557568, + "step": 27530 + }, + { + "epoch": 3.63402402006071, + "grad_norm": 0.001158829894848168, + "learning_rate": 4.213609094739089e-07, + "loss": 0.0, + "num_input_tokens_seen": 13560128, + "step": 27535 + }, + { + "epoch": 3.6346839118384584, + "grad_norm": 0.004890472162514925, + "learning_rate": 4.2098523300304236e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13562560, + "step": 27540 + }, + { + "epoch": 3.635343803616207, + "grad_norm": 0.35512468218803406, + "learning_rate": 4.2060967942386715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13564928, + "step": 27545 + }, + { + "epoch": 3.6360036953939554, + "grad_norm": 0.0007364979828707874, + "learning_rate": 4.2023424881609195e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13567360, + "step": 27550 + }, + { + "epoch": 3.6366635871717037, + "grad_norm": 0.22686073184013367, + "learning_rate": 4.1985894125939947e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13569920, + "step": 27555 + }, + { + "epoch": 3.6373234789494524, + "grad_norm": 0.0018527201609686017, + "learning_rate": 4.194837568334452e-07, + "loss": 0.0065, + "num_input_tokens_seen": 13572288, + "step": 27560 + }, + { + "epoch": 3.6379833707272007, + "grad_norm": 0.0005900466348975897, + "learning_rate": 4.191086956178598e-07, + "loss": 0.0, + "num_input_tokens_seen": 13574720, + "step": 27565 + }, + { + "epoch": 3.6386432625049494, + "grad_norm": 0.0005124110612086952, + "learning_rate": 4.187337576922476e-07, + "loss": 0.0, + "num_input_tokens_seen": 13577152, + "step": 27570 + }, + { + "epoch": 3.6393031542826977, + "grad_norm": 0.0024374271742999554, + "learning_rate": 4.1835894313618593e-07, + "loss": 0.0028, + "num_input_tokens_seen": 13579584, + "step": 27575 + }, + { + "epoch": 3.639963046060446, + "grad_norm": 0.0035164635628461838, + "learning_rate": 4.179842520292265e-07, + "loss": 0.0, + "num_input_tokens_seen": 13582016, + "step": 27580 + }, + { + "epoch": 3.6406229378381947, + "grad_norm": 0.047881029546260834, + "learning_rate": 4.176096844508954e-07, + "loss": 0.0, + "num_input_tokens_seen": 13584192, + "step": 27585 + }, + { + "epoch": 3.641282829615943, + "grad_norm": 0.09858769178390503, + "learning_rate": 4.17235240480691e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13586624, + "step": 27590 + }, + { + "epoch": 3.6419427213936917, + "grad_norm": 0.001338004251010716, + "learning_rate": 4.1686092019808685e-07, + "loss": 0.0016, + "num_input_tokens_seen": 13588864, + "step": 27595 + }, + { + "epoch": 3.64260261317144, + "grad_norm": 0.00016618985682725906, + "learning_rate": 4.164867236825296e-07, + "loss": 0.0487, + "num_input_tokens_seen": 13591552, + "step": 27600 + }, + { + "epoch": 3.6432625049491882, + "grad_norm": 9.292057991027832, + "learning_rate": 4.1611265101344005e-07, + "loss": 0.028, + "num_input_tokens_seen": 13593920, + "step": 27605 + }, + { + "epoch": 3.6439223967269365, + "grad_norm": 0.009514679200947285, + "learning_rate": 4.1573870227021224e-07, + "loss": 0.066, + "num_input_tokens_seen": 13596288, + "step": 27610 + }, + { + "epoch": 3.6445822885046852, + "grad_norm": 0.0004131619061809033, + "learning_rate": 4.153648775322132e-07, + "loss": 0.0, + "num_input_tokens_seen": 13598464, + "step": 27615 + }, + { + "epoch": 3.6452421802824335, + "grad_norm": 0.0005515352240763605, + "learning_rate": 4.1499117687878606e-07, + "loss": 0.0014, + "num_input_tokens_seen": 13600704, + "step": 27620 + }, + { + "epoch": 3.6459020720601822, + "grad_norm": 0.003655769629403949, + "learning_rate": 4.1461760038924496e-07, + "loss": 0.0487, + "num_input_tokens_seen": 13603136, + "step": 27625 + }, + { + "epoch": 3.6465619638379305, + "grad_norm": 0.00046932691475376487, + "learning_rate": 4.142441481428792e-07, + "loss": 0.0, + "num_input_tokens_seen": 13605440, + "step": 27630 + }, + { + "epoch": 3.647221855615679, + "grad_norm": 0.015964679419994354, + "learning_rate": 4.138708202189516e-07, + "loss": 0.0, + "num_input_tokens_seen": 13607744, + "step": 27635 + }, + { + "epoch": 3.6478817473934275, + "grad_norm": 0.001884157769382, + "learning_rate": 4.134976166966977e-07, + "loss": 0.0, + "num_input_tokens_seen": 13610240, + "step": 27640 + }, + { + "epoch": 3.648541639171176, + "grad_norm": 0.0008349604904651642, + "learning_rate": 4.131245376553278e-07, + "loss": 0.0754, + "num_input_tokens_seen": 13612480, + "step": 27645 + }, + { + "epoch": 3.6492015309489245, + "grad_norm": 0.029129212722182274, + "learning_rate": 4.1275158317402436e-07, + "loss": 0.0028, + "num_input_tokens_seen": 13615104, + "step": 27650 + }, + { + "epoch": 3.649861422726673, + "grad_norm": 0.0011978574329987168, + "learning_rate": 4.123787533319455e-07, + "loss": 0.0, + "num_input_tokens_seen": 13617536, + "step": 27655 + }, + { + "epoch": 3.650521314504421, + "grad_norm": 0.0017305328510701656, + "learning_rate": 4.1200604820822103e-07, + "loss": 0.0018, + "num_input_tokens_seen": 13619904, + "step": 27660 + }, + { + "epoch": 3.65118120628217, + "grad_norm": 0.0013345396146178246, + "learning_rate": 4.1163346788195465e-07, + "loss": 0.0, + "num_input_tokens_seen": 13622464, + "step": 27665 + }, + { + "epoch": 3.651841098059918, + "grad_norm": 0.0014696142170578241, + "learning_rate": 4.11261012432224e-07, + "loss": 0.0018, + "num_input_tokens_seen": 13625152, + "step": 27670 + }, + { + "epoch": 3.652500989837667, + "grad_norm": 0.11104393750429153, + "learning_rate": 4.1088868193808023e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13627712, + "step": 27675 + }, + { + "epoch": 3.653160881615415, + "grad_norm": 0.0015458540292456746, + "learning_rate": 4.10516476478548e-07, + "loss": 0.0, + "num_input_tokens_seen": 13629952, + "step": 27680 + }, + { + "epoch": 3.6538207733931634, + "grad_norm": 0.0003642126393970102, + "learning_rate": 4.101443961326245e-07, + "loss": 0.0688, + "num_input_tokens_seen": 13632576, + "step": 27685 + }, + { + "epoch": 3.654480665170912, + "grad_norm": 0.011262251064181328, + "learning_rate": 4.0977244097928164e-07, + "loss": 0.0, + "num_input_tokens_seen": 13634944, + "step": 27690 + }, + { + "epoch": 3.6551405569486604, + "grad_norm": 0.0009316291543655097, + "learning_rate": 4.094006110974645e-07, + "loss": 0.0, + "num_input_tokens_seen": 13637248, + "step": 27695 + }, + { + "epoch": 3.655800448726409, + "grad_norm": 0.029980765655636787, + "learning_rate": 4.0902890656609044e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13639744, + "step": 27700 + }, + { + "epoch": 3.6564603405041574, + "grad_norm": 40.80172348022461, + "learning_rate": 4.0865732746405145e-07, + "loss": 0.1378, + "num_input_tokens_seen": 13642240, + "step": 27705 + }, + { + "epoch": 3.6571202322819056, + "grad_norm": 0.0032207188196480274, + "learning_rate": 4.08285873870213e-07, + "loss": 0.0, + "num_input_tokens_seen": 13644672, + "step": 27710 + }, + { + "epoch": 3.6577801240596544, + "grad_norm": 0.00017996614042203873, + "learning_rate": 4.079145458634125e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13647040, + "step": 27715 + }, + { + "epoch": 3.6584400158374026, + "grad_norm": 0.00022340656141750515, + "learning_rate": 4.075433435224621e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13649600, + "step": 27720 + }, + { + "epoch": 3.6590999076151514, + "grad_norm": 0.0005656805005855858, + "learning_rate": 4.071722669261468e-07, + "loss": 0.0072, + "num_input_tokens_seen": 13652352, + "step": 27725 + }, + { + "epoch": 3.6597597993928996, + "grad_norm": 0.00013878944446332753, + "learning_rate": 4.068013161532253e-07, + "loss": 0.0, + "num_input_tokens_seen": 13654976, + "step": 27730 + }, + { + "epoch": 3.660419691170648, + "grad_norm": 0.0004583010741043836, + "learning_rate": 4.064304912824286e-07, + "loss": 0.0007, + "num_input_tokens_seen": 13657408, + "step": 27735 + }, + { + "epoch": 3.661079582948396, + "grad_norm": 1.2669492959976196, + "learning_rate": 4.0605979239246166e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13659392, + "step": 27740 + }, + { + "epoch": 3.661739474726145, + "grad_norm": 9.382128337165341e-05, + "learning_rate": 4.056892195620032e-07, + "loss": 0.0, + "num_input_tokens_seen": 13661824, + "step": 27745 + }, + { + "epoch": 3.6623993665038936, + "grad_norm": 0.002449862891808152, + "learning_rate": 4.0531877286970397e-07, + "loss": 0.0, + "num_input_tokens_seen": 13664384, + "step": 27750 + }, + { + "epoch": 3.663059258281642, + "grad_norm": 0.20444366335868835, + "learning_rate": 4.0494845239418873e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13666688, + "step": 27755 + }, + { + "epoch": 3.66371915005939, + "grad_norm": 0.0016125873662531376, + "learning_rate": 4.045782582140559e-07, + "loss": 0.0844, + "num_input_tokens_seen": 13669056, + "step": 27760 + }, + { + "epoch": 3.6643790418371385, + "grad_norm": 0.005589081905782223, + "learning_rate": 4.042081904078757e-07, + "loss": 0.1125, + "num_input_tokens_seen": 13671680, + "step": 27765 + }, + { + "epoch": 3.665038933614887, + "grad_norm": 0.00195342511869967, + "learning_rate": 4.0383824905419263e-07, + "loss": 0.0, + "num_input_tokens_seen": 13674240, + "step": 27770 + }, + { + "epoch": 3.6656988253926355, + "grad_norm": 0.0008958014077506959, + "learning_rate": 4.034684342315241e-07, + "loss": 0.0549, + "num_input_tokens_seen": 13676672, + "step": 27775 + }, + { + "epoch": 3.666358717170384, + "grad_norm": 0.0006265094853006303, + "learning_rate": 4.0309874601836114e-07, + "loss": 0.0, + "num_input_tokens_seen": 13678976, + "step": 27780 + }, + { + "epoch": 3.6670186089481325, + "grad_norm": 0.0008544830488972366, + "learning_rate": 4.0272918449316684e-07, + "loss": 0.0626, + "num_input_tokens_seen": 13681344, + "step": 27785 + }, + { + "epoch": 3.6676785007258808, + "grad_norm": 0.0001300136063946411, + "learning_rate": 4.0235974973437735e-07, + "loss": 0.0, + "num_input_tokens_seen": 13683456, + "step": 27790 + }, + { + "epoch": 3.6683383925036295, + "grad_norm": 0.013282733038067818, + "learning_rate": 4.0199044182040385e-07, + "loss": 0.0, + "num_input_tokens_seen": 13686272, + "step": 27795 + }, + { + "epoch": 3.6689982842813778, + "grad_norm": 0.00022985691612120718, + "learning_rate": 4.016212608296284e-07, + "loss": 0.1113, + "num_input_tokens_seen": 13688896, + "step": 27800 + }, + { + "epoch": 3.6696581760591265, + "grad_norm": 0.07989729940891266, + "learning_rate": 4.012522068404075e-07, + "loss": 0.0472, + "num_input_tokens_seen": 13691200, + "step": 27805 + }, + { + "epoch": 3.6703180678368748, + "grad_norm": 0.0033196040894836187, + "learning_rate": 4.0088327993106964e-07, + "loss": 0.0, + "num_input_tokens_seen": 13693888, + "step": 27810 + }, + { + "epoch": 3.670977959614623, + "grad_norm": 0.000571762619074434, + "learning_rate": 4.005144801799171e-07, + "loss": 0.0308, + "num_input_tokens_seen": 13696256, + "step": 27815 + }, + { + "epoch": 3.6716378513923718, + "grad_norm": 0.00014293697313405573, + "learning_rate": 4.001458076652253e-07, + "loss": 0.1735, + "num_input_tokens_seen": 13698752, + "step": 27820 + }, + { + "epoch": 3.67229774317012, + "grad_norm": 0.00529489666223526, + "learning_rate": 3.9977726246524133e-07, + "loss": 0.0006, + "num_input_tokens_seen": 13701376, + "step": 27825 + }, + { + "epoch": 3.6729576349478688, + "grad_norm": 0.0012214038288220763, + "learning_rate": 3.994088446581877e-07, + "loss": 0.0, + "num_input_tokens_seen": 13703808, + "step": 27830 + }, + { + "epoch": 3.673617526725617, + "grad_norm": 0.013352553360164165, + "learning_rate": 3.990405543222576e-07, + "loss": 0.0, + "num_input_tokens_seen": 13706176, + "step": 27835 + }, + { + "epoch": 3.6742774185033653, + "grad_norm": 0.0007113065803423524, + "learning_rate": 3.9867239153561774e-07, + "loss": 0.0079, + "num_input_tokens_seen": 13708480, + "step": 27840 + }, + { + "epoch": 3.674937310281114, + "grad_norm": 0.0019296143436804414, + "learning_rate": 3.9830435637640825e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13710848, + "step": 27845 + }, + { + "epoch": 3.6755972020588623, + "grad_norm": 0.005527435336261988, + "learning_rate": 3.979364489227419e-07, + "loss": 0.0, + "num_input_tokens_seen": 13713024, + "step": 27850 + }, + { + "epoch": 3.676257093836611, + "grad_norm": 0.15193168818950653, + "learning_rate": 3.9756866925270494e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13715776, + "step": 27855 + }, + { + "epoch": 3.6769169856143593, + "grad_norm": 0.06932579725980759, + "learning_rate": 3.972010174443551e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13718336, + "step": 27860 + }, + { + "epoch": 3.6775768773921076, + "grad_norm": 0.008432332426309586, + "learning_rate": 3.9683349357572417e-07, + "loss": 0.0, + "num_input_tokens_seen": 13720896, + "step": 27865 + }, + { + "epoch": 3.678236769169856, + "grad_norm": 0.00018875522073358297, + "learning_rate": 3.9646609772481677e-07, + "loss": 0.0, + "num_input_tokens_seen": 13723136, + "step": 27870 + }, + { + "epoch": 3.6788966609476046, + "grad_norm": 0.023280220106244087, + "learning_rate": 3.960988299696094e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13725568, + "step": 27875 + }, + { + "epoch": 3.6795565527253533, + "grad_norm": 0.00022550317225977778, + "learning_rate": 3.957316903880522e-07, + "loss": 0.0, + "num_input_tokens_seen": 13727936, + "step": 27880 + }, + { + "epoch": 3.6802164445031016, + "grad_norm": 0.002597242360934615, + "learning_rate": 3.953646790580679e-07, + "loss": 0.0, + "num_input_tokens_seen": 13730240, + "step": 27885 + }, + { + "epoch": 3.68087633628085, + "grad_norm": 0.00300345616415143, + "learning_rate": 3.949977960575525e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13732928, + "step": 27890 + }, + { + "epoch": 3.681536228058598, + "grad_norm": 4.238515853881836, + "learning_rate": 3.946310414643734e-07, + "loss": 0.0109, + "num_input_tokens_seen": 13735616, + "step": 27895 + }, + { + "epoch": 3.682196119836347, + "grad_norm": 0.0008129694033414125, + "learning_rate": 3.94264415356372e-07, + "loss": 0.0487, + "num_input_tokens_seen": 13738048, + "step": 27900 + }, + { + "epoch": 3.682856011614095, + "grad_norm": 0.004607574548572302, + "learning_rate": 3.938979178113625e-07, + "loss": 0.121, + "num_input_tokens_seen": 13740544, + "step": 27905 + }, + { + "epoch": 3.683515903391844, + "grad_norm": 0.00020315272558946162, + "learning_rate": 3.9353154890713037e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13743168, + "step": 27910 + }, + { + "epoch": 3.684175795169592, + "grad_norm": 0.0006358879618346691, + "learning_rate": 3.9316530872143537e-07, + "loss": 0.0281, + "num_input_tokens_seen": 13745408, + "step": 27915 + }, + { + "epoch": 3.6848356869473404, + "grad_norm": 0.003894600784406066, + "learning_rate": 3.927991973320096e-07, + "loss": 0.0337, + "num_input_tokens_seen": 13747904, + "step": 27920 + }, + { + "epoch": 3.685495578725089, + "grad_norm": 0.00034760107519105077, + "learning_rate": 3.924332148165569e-07, + "loss": 0.0579, + "num_input_tokens_seen": 13750400, + "step": 27925 + }, + { + "epoch": 3.6861554705028374, + "grad_norm": 0.001135274418629706, + "learning_rate": 3.9206736125275463e-07, + "loss": 0.0, + "num_input_tokens_seen": 13753024, + "step": 27930 + }, + { + "epoch": 3.686815362280586, + "grad_norm": 9.904774196911603e-05, + "learning_rate": 3.9170163671825265e-07, + "loss": 0.0, + "num_input_tokens_seen": 13755520, + "step": 27935 + }, + { + "epoch": 3.6874752540583344, + "grad_norm": 1.1374342441558838, + "learning_rate": 3.9133604129067364e-07, + "loss": 0.001, + "num_input_tokens_seen": 13758336, + "step": 27940 + }, + { + "epoch": 3.6881351458360827, + "grad_norm": 0.00011956832167925313, + "learning_rate": 3.9097057504761234e-07, + "loss": 0.0, + "num_input_tokens_seen": 13760960, + "step": 27945 + }, + { + "epoch": 3.6887950376138314, + "grad_norm": 0.0032787492964416742, + "learning_rate": 3.9060523806663556e-07, + "loss": 0.0, + "num_input_tokens_seen": 13763520, + "step": 27950 + }, + { + "epoch": 3.6894549293915797, + "grad_norm": 0.00014116879901848733, + "learning_rate": 3.9024003042528474e-07, + "loss": 0.0, + "num_input_tokens_seen": 13766144, + "step": 27955 + }, + { + "epoch": 3.6901148211693284, + "grad_norm": 0.0004441550699993968, + "learning_rate": 3.898749522010716e-07, + "loss": 0.0, + "num_input_tokens_seen": 13768768, + "step": 27960 + }, + { + "epoch": 3.6907747129470767, + "grad_norm": 0.007838066667318344, + "learning_rate": 3.895100034714817e-07, + "loss": 0.0674, + "num_input_tokens_seen": 13771264, + "step": 27965 + }, + { + "epoch": 3.691434604724825, + "grad_norm": 0.006580795627087355, + "learning_rate": 3.8914518431397305e-07, + "loss": 0.0, + "num_input_tokens_seen": 13773440, + "step": 27970 + }, + { + "epoch": 3.6920944965025737, + "grad_norm": 0.045708343386650085, + "learning_rate": 3.887804948059752e-07, + "loss": 0.0007, + "num_input_tokens_seen": 13775872, + "step": 27975 + }, + { + "epoch": 3.692754388280322, + "grad_norm": 0.03007504530251026, + "learning_rate": 3.8841593502489155e-07, + "loss": 0.0, + "num_input_tokens_seen": 13778112, + "step": 27980 + }, + { + "epoch": 3.6934142800580707, + "grad_norm": 0.0015138540184125304, + "learning_rate": 3.880515050480964e-07, + "loss": 0.0, + "num_input_tokens_seen": 13780544, + "step": 27985 + }, + { + "epoch": 3.694074171835819, + "grad_norm": 1.4385871887207031, + "learning_rate": 3.876872049529385e-07, + "loss": 0.0012, + "num_input_tokens_seen": 13782976, + "step": 27990 + }, + { + "epoch": 3.6947340636135673, + "grad_norm": 0.0011436428176239133, + "learning_rate": 3.8732303481673733e-07, + "loss": 0.0, + "num_input_tokens_seen": 13785472, + "step": 27995 + }, + { + "epoch": 3.695393955391316, + "grad_norm": 56.57427978515625, + "learning_rate": 3.869589947167851e-07, + "loss": 0.0673, + "num_input_tokens_seen": 13788032, + "step": 28000 + }, + { + "epoch": 3.6960538471690643, + "grad_norm": 0.00022567510313820094, + "learning_rate": 3.8659508473034684e-07, + "loss": 0.0, + "num_input_tokens_seen": 13790528, + "step": 28005 + }, + { + "epoch": 3.696713738946813, + "grad_norm": 0.003226806875318289, + "learning_rate": 3.8623130493465994e-07, + "loss": 0.0029, + "num_input_tokens_seen": 13793216, + "step": 28010 + }, + { + "epoch": 3.6973736307245613, + "grad_norm": 0.0005611493834294379, + "learning_rate": 3.8586765540693434e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13795584, + "step": 28015 + }, + { + "epoch": 3.6980335225023095, + "grad_norm": 0.00033650652039796114, + "learning_rate": 3.855041362243514e-07, + "loss": 0.0, + "num_input_tokens_seen": 13797952, + "step": 28020 + }, + { + "epoch": 3.698693414280058, + "grad_norm": 0.018644453957676888, + "learning_rate": 3.8514074746406566e-07, + "loss": 0.0, + "num_input_tokens_seen": 13800576, + "step": 28025 + }, + { + "epoch": 3.6993533060578065, + "grad_norm": 0.031189944595098495, + "learning_rate": 3.847774892032042e-07, + "loss": 0.0, + "num_input_tokens_seen": 13803136, + "step": 28030 + }, + { + "epoch": 3.700013197835555, + "grad_norm": 0.45781606435775757, + "learning_rate": 3.844143615188652e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13805248, + "step": 28035 + }, + { + "epoch": 3.7006730896133035, + "grad_norm": 0.042275186628103256, + "learning_rate": 3.8405136448812023e-07, + "loss": 0.0, + "num_input_tokens_seen": 13807424, + "step": 28040 + }, + { + "epoch": 3.701332981391052, + "grad_norm": 0.0003274598275311291, + "learning_rate": 3.8368849818801317e-07, + "loss": 0.058, + "num_input_tokens_seen": 13810304, + "step": 28045 + }, + { + "epoch": 3.7019928731688, + "grad_norm": 0.005129341036081314, + "learning_rate": 3.8332576269555906e-07, + "loss": 0.0, + "num_input_tokens_seen": 13812544, + "step": 28050 + }, + { + "epoch": 3.702652764946549, + "grad_norm": 0.017442552372813225, + "learning_rate": 3.8296315808774616e-07, + "loss": 0.0, + "num_input_tokens_seen": 13815040, + "step": 28055 + }, + { + "epoch": 3.703312656724297, + "grad_norm": 0.00010095502511831, + "learning_rate": 3.826006844415347e-07, + "loss": 0.0, + "num_input_tokens_seen": 13817536, + "step": 28060 + }, + { + "epoch": 3.703972548502046, + "grad_norm": 0.00021431800269056112, + "learning_rate": 3.822383418338576e-07, + "loss": 0.0, + "num_input_tokens_seen": 13819840, + "step": 28065 + }, + { + "epoch": 3.704632440279794, + "grad_norm": 0.0005621056770905852, + "learning_rate": 3.8187613034161847e-07, + "loss": 0.0, + "num_input_tokens_seen": 13822208, + "step": 28070 + }, + { + "epoch": 3.7052923320575424, + "grad_norm": 0.0012197456089779735, + "learning_rate": 3.815140500416947e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13824512, + "step": 28075 + }, + { + "epoch": 3.705952223835291, + "grad_norm": 0.00770995020866394, + "learning_rate": 3.811521010109353e-07, + "loss": 0.0008, + "num_input_tokens_seen": 13826816, + "step": 28080 + }, + { + "epoch": 3.7066121156130394, + "grad_norm": 0.00017412604938726872, + "learning_rate": 3.807902833261609e-07, + "loss": 0.0, + "num_input_tokens_seen": 13829312, + "step": 28085 + }, + { + "epoch": 3.707272007390788, + "grad_norm": 0.0002185389748774469, + "learning_rate": 3.804285970641649e-07, + "loss": 0.0518, + "num_input_tokens_seen": 13831680, + "step": 28090 + }, + { + "epoch": 3.7079318991685364, + "grad_norm": 0.0013204488204792142, + "learning_rate": 3.800670423017128e-07, + "loss": 0.0533, + "num_input_tokens_seen": 13834240, + "step": 28095 + }, + { + "epoch": 3.7085917909462847, + "grad_norm": 11.457676887512207, + "learning_rate": 3.7970561911554143e-07, + "loss": 0.0718, + "num_input_tokens_seen": 13836352, + "step": 28100 + }, + { + "epoch": 3.7092516827240334, + "grad_norm": 0.0008938403916545212, + "learning_rate": 3.793443275823607e-07, + "loss": 0.0, + "num_input_tokens_seen": 13838976, + "step": 28105 + }, + { + "epoch": 3.7099115745017817, + "grad_norm": 0.002852260135114193, + "learning_rate": 3.7898316777885195e-07, + "loss": 0.0011, + "num_input_tokens_seen": 13841472, + "step": 28110 + }, + { + "epoch": 3.7105714662795304, + "grad_norm": 0.008115318603813648, + "learning_rate": 3.786221397816691e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13843712, + "step": 28115 + }, + { + "epoch": 3.7112313580572787, + "grad_norm": 0.0013575529446825385, + "learning_rate": 3.782612436674375e-07, + "loss": 0.0487, + "num_input_tokens_seen": 13846208, + "step": 28120 + }, + { + "epoch": 3.711891249835027, + "grad_norm": 0.0008263670606538653, + "learning_rate": 3.7790047951275394e-07, + "loss": 0.0401, + "num_input_tokens_seen": 13848448, + "step": 28125 + }, + { + "epoch": 3.7125511416127757, + "grad_norm": 0.07842446118593216, + "learning_rate": 3.7753984739418945e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13850880, + "step": 28130 + }, + { + "epoch": 3.713211033390524, + "grad_norm": 0.007149757817387581, + "learning_rate": 3.771793473882844e-07, + "loss": 0.0025, + "num_input_tokens_seen": 13853440, + "step": 28135 + }, + { + "epoch": 3.7138709251682727, + "grad_norm": 0.9949508309364319, + "learning_rate": 3.768189795715532e-07, + "loss": 0.0912, + "num_input_tokens_seen": 13856000, + "step": 28140 + }, + { + "epoch": 3.714530816946021, + "grad_norm": 0.016497809439897537, + "learning_rate": 3.764587440204804e-07, + "loss": 0.0, + "num_input_tokens_seen": 13858368, + "step": 28145 + }, + { + "epoch": 3.715190708723769, + "grad_norm": 0.008333483710885048, + "learning_rate": 3.7609864081152387e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13861056, + "step": 28150 + }, + { + "epoch": 3.7158506005015175, + "grad_norm": 0.0004311394295655191, + "learning_rate": 3.7573867002111324e-07, + "loss": 0.0, + "num_input_tokens_seen": 13863552, + "step": 28155 + }, + { + "epoch": 3.716510492279266, + "grad_norm": 0.0013469455298036337, + "learning_rate": 3.753788317256488e-07, + "loss": 0.0, + "num_input_tokens_seen": 13866240, + "step": 28160 + }, + { + "epoch": 3.7171703840570145, + "grad_norm": 0.0002838643849827349, + "learning_rate": 3.7501912600150474e-07, + "loss": 0.0, + "num_input_tokens_seen": 13868480, + "step": 28165 + }, + { + "epoch": 3.717830275834763, + "grad_norm": 0.0029573384672403336, + "learning_rate": 3.7465955292502505e-07, + "loss": 0.0, + "num_input_tokens_seen": 13870592, + "step": 28170 + }, + { + "epoch": 3.7184901676125115, + "grad_norm": 0.00849292054772377, + "learning_rate": 3.7430011257252735e-07, + "loss": 0.0308, + "num_input_tokens_seen": 13872704, + "step": 28175 + }, + { + "epoch": 3.7191500593902598, + "grad_norm": 0.7944841384887695, + "learning_rate": 3.7394080502029934e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13874880, + "step": 28180 + }, + { + "epoch": 3.7198099511680085, + "grad_norm": 14.134904861450195, + "learning_rate": 3.73581630344602e-07, + "loss": 0.0302, + "num_input_tokens_seen": 13877248, + "step": 28185 + }, + { + "epoch": 3.7204698429457568, + "grad_norm": 0.06969019025564194, + "learning_rate": 3.732225886216678e-07, + "loss": 0.0, + "num_input_tokens_seen": 13879744, + "step": 28190 + }, + { + "epoch": 3.7211297347235055, + "grad_norm": 0.06846865266561508, + "learning_rate": 3.7286367992769994e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13882112, + "step": 28195 + }, + { + "epoch": 3.721789626501254, + "grad_norm": 0.0003618435002863407, + "learning_rate": 3.7250490433887473e-07, + "loss": 0.0471, + "num_input_tokens_seen": 13884416, + "step": 28200 + }, + { + "epoch": 3.722449518279002, + "grad_norm": 24.19239616394043, + "learning_rate": 3.7214626193133993e-07, + "loss": 0.0548, + "num_input_tokens_seen": 13886656, + "step": 28205 + }, + { + "epoch": 3.723109410056751, + "grad_norm": 0.0008490770705975592, + "learning_rate": 3.717877527812141e-07, + "loss": 0.1484, + "num_input_tokens_seen": 13889088, + "step": 28210 + }, + { + "epoch": 3.723769301834499, + "grad_norm": 0.011017916724085808, + "learning_rate": 3.714293769645886e-07, + "loss": 0.0813, + "num_input_tokens_seen": 13891456, + "step": 28215 + }, + { + "epoch": 3.724429193612248, + "grad_norm": 0.0930677130818367, + "learning_rate": 3.710711345575261e-07, + "loss": 0.0005, + "num_input_tokens_seen": 13894016, + "step": 28220 + }, + { + "epoch": 3.725089085389996, + "grad_norm": 0.019668880850076675, + "learning_rate": 3.707130256360614e-07, + "loss": 0.0, + "num_input_tokens_seen": 13896512, + "step": 28225 + }, + { + "epoch": 3.7257489771677443, + "grad_norm": 0.008796813897788525, + "learning_rate": 3.7035505027619964e-07, + "loss": 0.0181, + "num_input_tokens_seen": 13899008, + "step": 28230 + }, + { + "epoch": 3.726408868945493, + "grad_norm": 53.365760803222656, + "learning_rate": 3.6999720855391893e-07, + "loss": 0.0411, + "num_input_tokens_seen": 13901632, + "step": 28235 + }, + { + "epoch": 3.7270687607232413, + "grad_norm": 27.761310577392578, + "learning_rate": 3.696395005451689e-07, + "loss": 0.0704, + "num_input_tokens_seen": 13903936, + "step": 28240 + }, + { + "epoch": 3.72772865250099, + "grad_norm": 0.003962219692766666, + "learning_rate": 3.6928192632586986e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13906368, + "step": 28245 + }, + { + "epoch": 3.7283885442787383, + "grad_norm": 37.43833541870117, + "learning_rate": 3.6892448597191463e-07, + "loss": 0.0881, + "num_input_tokens_seen": 13908992, + "step": 28250 + }, + { + "epoch": 3.7290484360564866, + "grad_norm": 11.68809700012207, + "learning_rate": 3.685671795591677e-07, + "loss": 0.0367, + "num_input_tokens_seen": 13911744, + "step": 28255 + }, + { + "epoch": 3.7297083278342353, + "grad_norm": 0.7247753739356995, + "learning_rate": 3.682100071634642e-07, + "loss": 0.0018, + "num_input_tokens_seen": 13914240, + "step": 28260 + }, + { + "epoch": 3.7303682196119836, + "grad_norm": 0.0006752077606506646, + "learning_rate": 3.6785296886061144e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13917120, + "step": 28265 + }, + { + "epoch": 3.7310281113897323, + "grad_norm": 0.010004118084907532, + "learning_rate": 3.674960647263885e-07, + "loss": 0.0, + "num_input_tokens_seen": 13919616, + "step": 28270 + }, + { + "epoch": 3.7316880031674806, + "grad_norm": 0.00652843713760376, + "learning_rate": 3.671392948365458e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13922560, + "step": 28275 + }, + { + "epoch": 3.732347894945229, + "grad_norm": 0.09454436600208282, + "learning_rate": 3.667826592668052e-07, + "loss": 0.0002, + "num_input_tokens_seen": 13925376, + "step": 28280 + }, + { + "epoch": 3.733007786722977, + "grad_norm": 0.10800040513277054, + "learning_rate": 3.664261580928589e-07, + "loss": 0.0006, + "num_input_tokens_seen": 13927936, + "step": 28285 + }, + { + "epoch": 3.733667678500726, + "grad_norm": 0.0006057324353605509, + "learning_rate": 3.660697913903733e-07, + "loss": 0.0044, + "num_input_tokens_seen": 13930176, + "step": 28290 + }, + { + "epoch": 3.734327570278474, + "grad_norm": 0.025416888296604156, + "learning_rate": 3.6571355923498346e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13932800, + "step": 28295 + }, + { + "epoch": 3.734987462056223, + "grad_norm": 0.0017488945741206408, + "learning_rate": 3.6535746170229777e-07, + "loss": 0.0049, + "num_input_tokens_seen": 13935424, + "step": 28300 + }, + { + "epoch": 3.735647353833971, + "grad_norm": 0.0009809860493987799, + "learning_rate": 3.6500149886789524e-07, + "loss": 0.0039, + "num_input_tokens_seen": 13938176, + "step": 28305 + }, + { + "epoch": 3.7363072456117195, + "grad_norm": 0.012923874892294407, + "learning_rate": 3.64645670807326e-07, + "loss": 0.0075, + "num_input_tokens_seen": 13940672, + "step": 28310 + }, + { + "epoch": 3.736967137389468, + "grad_norm": 0.005602862685918808, + "learning_rate": 3.642899775961127e-07, + "loss": 0.0013, + "num_input_tokens_seen": 13943232, + "step": 28315 + }, + { + "epoch": 3.7376270291672165, + "grad_norm": 0.004117357078939676, + "learning_rate": 3.6393441930974734e-07, + "loss": 0.0224, + "num_input_tokens_seen": 13945472, + "step": 28320 + }, + { + "epoch": 3.738286920944965, + "grad_norm": 0.0016863815253600478, + "learning_rate": 3.6357899602369626e-07, + "loss": 0.1003, + "num_input_tokens_seen": 13948288, + "step": 28325 + }, + { + "epoch": 3.7389468127227135, + "grad_norm": 0.009383490309119225, + "learning_rate": 3.632237078133946e-07, + "loss": 0.0, + "num_input_tokens_seen": 13950464, + "step": 28330 + }, + { + "epoch": 3.7396067045004617, + "grad_norm": 0.06896061450242996, + "learning_rate": 3.628685547542496e-07, + "loss": 0.0087, + "num_input_tokens_seen": 13952640, + "step": 28335 + }, + { + "epoch": 3.7402665962782105, + "grad_norm": 0.0010050699347630143, + "learning_rate": 3.6251353692164e-07, + "loss": 0.0, + "num_input_tokens_seen": 13954944, + "step": 28340 + }, + { + "epoch": 3.7409264880559587, + "grad_norm": 0.024658070877194405, + "learning_rate": 3.6215865439091587e-07, + "loss": 0.0097, + "num_input_tokens_seen": 13957184, + "step": 28345 + }, + { + "epoch": 3.7415863798337075, + "grad_norm": 0.00028802931774407625, + "learning_rate": 3.6180390723739883e-07, + "loss": 0.0, + "num_input_tokens_seen": 13959552, + "step": 28350 + }, + { + "epoch": 3.7422462716114557, + "grad_norm": 0.00033592613181099296, + "learning_rate": 3.614492955363806e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13962240, + "step": 28355 + }, + { + "epoch": 3.742906163389204, + "grad_norm": 0.0001732255332171917, + "learning_rate": 3.610948193631255e-07, + "loss": 0.0844, + "num_input_tokens_seen": 13964544, + "step": 28360 + }, + { + "epoch": 3.7435660551669527, + "grad_norm": 0.0003821653372142464, + "learning_rate": 3.607404787928686e-07, + "loss": 0.0, + "num_input_tokens_seen": 13967040, + "step": 28365 + }, + { + "epoch": 3.744225946944701, + "grad_norm": 0.0010076966136693954, + "learning_rate": 3.6038627390081567e-07, + "loss": 0.0, + "num_input_tokens_seen": 13969728, + "step": 28370 + }, + { + "epoch": 3.7448858387224497, + "grad_norm": 0.011342594400048256, + "learning_rate": 3.6003220476214445e-07, + "loss": 0.0, + "num_input_tokens_seen": 13972416, + "step": 28375 + }, + { + "epoch": 3.745545730500198, + "grad_norm": 0.00022565149993170053, + "learning_rate": 3.596782714520037e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13975040, + "step": 28380 + }, + { + "epoch": 3.7462056222779463, + "grad_norm": 0.0006821189308539033, + "learning_rate": 3.593244740455127e-07, + "loss": 0.0004, + "num_input_tokens_seen": 13977472, + "step": 28385 + }, + { + "epoch": 3.746865514055695, + "grad_norm": 0.00014877947978675365, + "learning_rate": 3.5897081261776275e-07, + "loss": 0.0176, + "num_input_tokens_seen": 13979776, + "step": 28390 + }, + { + "epoch": 3.7475254058334433, + "grad_norm": 0.0004527225682977587, + "learning_rate": 3.586172872438158e-07, + "loss": 0.0001, + "num_input_tokens_seen": 13982336, + "step": 28395 + }, + { + "epoch": 3.748185297611192, + "grad_norm": 0.000232151331147179, + "learning_rate": 3.582638979987054e-07, + "loss": 0.0352, + "num_input_tokens_seen": 13984768, + "step": 28400 + }, + { + "epoch": 3.7488451893889403, + "grad_norm": 0.0008931290940381587, + "learning_rate": 3.579106449574353e-07, + "loss": 0.0521, + "num_input_tokens_seen": 13986880, + "step": 28405 + }, + { + "epoch": 3.7495050811666886, + "grad_norm": 0.0014002250973135233, + "learning_rate": 3.5755752819498107e-07, + "loss": 0.0, + "num_input_tokens_seen": 13989696, + "step": 28410 + }, + { + "epoch": 3.750164972944437, + "grad_norm": 0.0012177800526842475, + "learning_rate": 3.572045477862896e-07, + "loss": 0.0109, + "num_input_tokens_seen": 13991936, + "step": 28415 + }, + { + "epoch": 3.7508248647221856, + "grad_norm": 0.00019355359836481512, + "learning_rate": 3.568517038062778e-07, + "loss": 0.0003, + "num_input_tokens_seen": 13994496, + "step": 28420 + }, + { + "epoch": 3.751484756499934, + "grad_norm": 0.002805389231070876, + "learning_rate": 3.564989963298346e-07, + "loss": 0.0243, + "num_input_tokens_seen": 13996672, + "step": 28425 + }, + { + "epoch": 3.751484756499934, + "eval_loss": 0.20364880561828613, + "eval_runtime": 7.9146, + "eval_samples_per_second": 850.964, + "eval_steps_per_second": 106.386, + "num_input_tokens_seen": 13996672, + "step": 28425 + }, + { + "epoch": 3.7521446482776826, + "grad_norm": 0.0034394606482237577, + "learning_rate": 3.5614642543181996e-07, + "loss": 0.1141, + "num_input_tokens_seen": 13998976, + "step": 28430 + }, + { + "epoch": 3.752804540055431, + "grad_norm": 0.0035546584986150265, + "learning_rate": 3.5579399118706364e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14001152, + "step": 28435 + }, + { + "epoch": 3.753464431833179, + "grad_norm": 0.007250829134136438, + "learning_rate": 3.5544169367036783e-07, + "loss": 0.0294, + "num_input_tokens_seen": 14003520, + "step": 28440 + }, + { + "epoch": 3.754124323610928, + "grad_norm": 0.001128159579820931, + "learning_rate": 3.550895329565049e-07, + "loss": 0.0, + "num_input_tokens_seen": 14005824, + "step": 28445 + }, + { + "epoch": 3.754784215388676, + "grad_norm": 0.22034797072410583, + "learning_rate": 3.5473750912021894e-07, + "loss": 0.0, + "num_input_tokens_seen": 14008128, + "step": 28450 + }, + { + "epoch": 3.755444107166425, + "grad_norm": 0.00023955103824846447, + "learning_rate": 3.543856222362239e-07, + "loss": 0.0096, + "num_input_tokens_seen": 14010560, + "step": 28455 + }, + { + "epoch": 3.756103998944173, + "grad_norm": 0.0013909810222685337, + "learning_rate": 3.540338723792049e-07, + "loss": 0.0, + "num_input_tokens_seen": 14013184, + "step": 28460 + }, + { + "epoch": 3.7567638907219214, + "grad_norm": 0.002331098075956106, + "learning_rate": 3.5368225962381924e-07, + "loss": 0.0, + "num_input_tokens_seen": 14015552, + "step": 28465 + }, + { + "epoch": 3.75742378249967, + "grad_norm": 0.00235453387722373, + "learning_rate": 3.533307840446935e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14018112, + "step": 28470 + }, + { + "epoch": 3.7580836742774184, + "grad_norm": 0.012911655008792877, + "learning_rate": 3.529794457164265e-07, + "loss": 0.0, + "num_input_tokens_seen": 14020736, + "step": 28475 + }, + { + "epoch": 3.758743566055167, + "grad_norm": 13.316226959228516, + "learning_rate": 3.526282447135862e-07, + "loss": 0.0088, + "num_input_tokens_seen": 14023104, + "step": 28480 + }, + { + "epoch": 3.7594034578329154, + "grad_norm": 0.0003600022755563259, + "learning_rate": 3.5227718111071316e-07, + "loss": 0.0457, + "num_input_tokens_seen": 14025664, + "step": 28485 + }, + { + "epoch": 3.7600633496106637, + "grad_norm": 0.15423204004764557, + "learning_rate": 3.519262549823183e-07, + "loss": 0.0175, + "num_input_tokens_seen": 14027776, + "step": 28490 + }, + { + "epoch": 3.7607232413884124, + "grad_norm": 0.0038064823020249605, + "learning_rate": 3.5157546640288227e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14030144, + "step": 28495 + }, + { + "epoch": 3.7613831331661607, + "grad_norm": 0.003817281685769558, + "learning_rate": 3.5122481544685857e-07, + "loss": 0.0067, + "num_input_tokens_seen": 14032576, + "step": 28500 + }, + { + "epoch": 3.7620430249439094, + "grad_norm": 21.253145217895508, + "learning_rate": 3.5087430218866945e-07, + "loss": 0.0166, + "num_input_tokens_seen": 14034944, + "step": 28505 + }, + { + "epoch": 3.7627029167216577, + "grad_norm": 0.00010077494516735896, + "learning_rate": 3.505239267027094e-07, + "loss": 0.0, + "num_input_tokens_seen": 14037312, + "step": 28510 + }, + { + "epoch": 3.763362808499406, + "grad_norm": 9.552456855773926, + "learning_rate": 3.5017368906334235e-07, + "loss": 0.0208, + "num_input_tokens_seen": 14039872, + "step": 28515 + }, + { + "epoch": 3.7640227002771547, + "grad_norm": 30.936616897583008, + "learning_rate": 3.498235893449042e-07, + "loss": 0.0324, + "num_input_tokens_seen": 14042240, + "step": 28520 + }, + { + "epoch": 3.764682592054903, + "grad_norm": 0.043132536113262177, + "learning_rate": 3.494736276217013e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14044672, + "step": 28525 + }, + { + "epoch": 3.7653424838326517, + "grad_norm": 0.002851359313353896, + "learning_rate": 3.4912380396800987e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14047040, + "step": 28530 + }, + { + "epoch": 3.7660023756104, + "grad_norm": 0.0006523252232000232, + "learning_rate": 3.4877411845807783e-07, + "loss": 0.0735, + "num_input_tokens_seen": 14049856, + "step": 28535 + }, + { + "epoch": 3.7666622673881482, + "grad_norm": 0.002385950880125165, + "learning_rate": 3.4842457116612365e-07, + "loss": 0.0352, + "num_input_tokens_seen": 14052288, + "step": 28540 + }, + { + "epoch": 3.7673221591658965, + "grad_norm": 0.0008141055586747825, + "learning_rate": 3.4807516216633557e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14054528, + "step": 28545 + }, + { + "epoch": 3.7679820509436452, + "grad_norm": 0.014787948690354824, + "learning_rate": 3.477258915328735e-07, + "loss": 0.0, + "num_input_tokens_seen": 14056896, + "step": 28550 + }, + { + "epoch": 3.768641942721394, + "grad_norm": 0.001489198417402804, + "learning_rate": 3.4737675933986744e-07, + "loss": 0.0, + "num_input_tokens_seen": 14059392, + "step": 28555 + }, + { + "epoch": 3.7693018344991422, + "grad_norm": 0.00500260666012764, + "learning_rate": 3.4702776566141864e-07, + "loss": 0.0, + "num_input_tokens_seen": 14061696, + "step": 28560 + }, + { + "epoch": 3.7699617262768905, + "grad_norm": 0.6943917870521545, + "learning_rate": 3.4667891057159784e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14063744, + "step": 28565 + }, + { + "epoch": 3.770621618054639, + "grad_norm": 0.0005322875804267824, + "learning_rate": 3.463301941444473e-07, + "loss": 0.0906, + "num_input_tokens_seen": 14066240, + "step": 28570 + }, + { + "epoch": 3.7712815098323875, + "grad_norm": 0.0003532171540427953, + "learning_rate": 3.459816164539798e-07, + "loss": 0.0, + "num_input_tokens_seen": 14068736, + "step": 28575 + }, + { + "epoch": 3.771941401610136, + "grad_norm": 0.07054813206195831, + "learning_rate": 3.456331775741779e-07, + "loss": 0.1, + "num_input_tokens_seen": 14071232, + "step": 28580 + }, + { + "epoch": 3.7726012933878845, + "grad_norm": 0.001612989348359406, + "learning_rate": 3.452848775789955e-07, + "loss": 0.0, + "num_input_tokens_seen": 14073664, + "step": 28585 + }, + { + "epoch": 3.773261185165633, + "grad_norm": 1.5302505493164062, + "learning_rate": 3.449367165423571e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14075904, + "step": 28590 + }, + { + "epoch": 3.773921076943381, + "grad_norm": 0.011845918372273445, + "learning_rate": 3.4458869453815674e-07, + "loss": 0.002, + "num_input_tokens_seen": 14078208, + "step": 28595 + }, + { + "epoch": 3.77458096872113, + "grad_norm": 0.006674485746771097, + "learning_rate": 3.4424081164025976e-07, + "loss": 0.0446, + "num_input_tokens_seen": 14080704, + "step": 28600 + }, + { + "epoch": 3.775240860498878, + "grad_norm": 0.012882623821496964, + "learning_rate": 3.4389306792250194e-07, + "loss": 0.0, + "num_input_tokens_seen": 14083072, + "step": 28605 + }, + { + "epoch": 3.775900752276627, + "grad_norm": 0.004808145109564066, + "learning_rate": 3.435454634586896e-07, + "loss": 0.0, + "num_input_tokens_seen": 14085248, + "step": 28610 + }, + { + "epoch": 3.776560644054375, + "grad_norm": 0.0003251858288422227, + "learning_rate": 3.431979983225987e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14087936, + "step": 28615 + }, + { + "epoch": 3.7772205358321234, + "grad_norm": 22.550294876098633, + "learning_rate": 3.4285067258797626e-07, + "loss": 0.0266, + "num_input_tokens_seen": 14090368, + "step": 28620 + }, + { + "epoch": 3.777880427609872, + "grad_norm": 0.0005045776488259435, + "learning_rate": 3.425034863285404e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14093568, + "step": 28625 + }, + { + "epoch": 3.7785403193876204, + "grad_norm": 0.0031521327327936888, + "learning_rate": 3.42156439617978e-07, + "loss": 0.0341, + "num_input_tokens_seen": 14095808, + "step": 28630 + }, + { + "epoch": 3.779200211165369, + "grad_norm": 0.0008202116587199271, + "learning_rate": 3.418095325299475e-07, + "loss": 0.0, + "num_input_tokens_seen": 14098368, + "step": 28635 + }, + { + "epoch": 3.7798601029431174, + "grad_norm": 0.07252146303653717, + "learning_rate": 3.414627651380778e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14100736, + "step": 28640 + }, + { + "epoch": 3.7805199947208656, + "grad_norm": 7.641245611011982e-05, + "learning_rate": 3.4111613751596725e-07, + "loss": 0.0, + "num_input_tokens_seen": 14103104, + "step": 28645 + }, + { + "epoch": 3.7811798864986144, + "grad_norm": 0.0003453242243267596, + "learning_rate": 3.407696497371855e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14105600, + "step": 28650 + }, + { + "epoch": 3.7818397782763626, + "grad_norm": 0.0007029441185295582, + "learning_rate": 3.40423301875271e-07, + "loss": 0.0, + "num_input_tokens_seen": 14107712, + "step": 28655 + }, + { + "epoch": 3.7824996700541114, + "grad_norm": 0.0001722180750221014, + "learning_rate": 3.400770940037353e-07, + "loss": 0.0008, + "num_input_tokens_seen": 14110080, + "step": 28660 + }, + { + "epoch": 3.7831595618318596, + "grad_norm": 0.0002277433522976935, + "learning_rate": 3.3973102619605753e-07, + "loss": 0.0054, + "num_input_tokens_seen": 14112512, + "step": 28665 + }, + { + "epoch": 3.783819453609608, + "grad_norm": 0.0007299144635908306, + "learning_rate": 3.3938509852568773e-07, + "loss": 0.0, + "num_input_tokens_seen": 14114624, + "step": 28670 + }, + { + "epoch": 3.784479345387356, + "grad_norm": 0.07267485558986664, + "learning_rate": 3.390393110660471e-07, + "loss": 0.0, + "num_input_tokens_seen": 14116928, + "step": 28675 + }, + { + "epoch": 3.785139237165105, + "grad_norm": 0.0013063414953649044, + "learning_rate": 3.386936638905263e-07, + "loss": 0.0, + "num_input_tokens_seen": 14119296, + "step": 28680 + }, + { + "epoch": 3.7857991289428536, + "grad_norm": 0.008357301354408264, + "learning_rate": 3.38348157072487e-07, + "loss": 0.0, + "num_input_tokens_seen": 14121600, + "step": 28685 + }, + { + "epoch": 3.786459020720602, + "grad_norm": 0.0002192695828853175, + "learning_rate": 3.380027906852596e-07, + "loss": 0.0426, + "num_input_tokens_seen": 14123840, + "step": 28690 + }, + { + "epoch": 3.78711891249835, + "grad_norm": 0.00023211569350678474, + "learning_rate": 3.3765756480214616e-07, + "loss": 0.0, + "num_input_tokens_seen": 14126208, + "step": 28695 + }, + { + "epoch": 3.7877788042760985, + "grad_norm": 0.008235753513872623, + "learning_rate": 3.373124794964185e-07, + "loss": 0.0, + "num_input_tokens_seen": 14128640, + "step": 28700 + }, + { + "epoch": 3.788438696053847, + "grad_norm": 0.00028870353708043694, + "learning_rate": 3.36967534841318e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14130944, + "step": 28705 + }, + { + "epoch": 3.7890985878315955, + "grad_norm": 0.019349442794919014, + "learning_rate": 3.3662273091005687e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14133504, + "step": 28710 + }, + { + "epoch": 3.789758479609344, + "grad_norm": 0.0013696362730115652, + "learning_rate": 3.3627806777581777e-07, + "loss": 0.0011, + "num_input_tokens_seen": 14136128, + "step": 28715 + }, + { + "epoch": 3.7904183713870925, + "grad_norm": 0.043605487793684006, + "learning_rate": 3.35933545511752e-07, + "loss": 0.0919, + "num_input_tokens_seen": 14138432, + "step": 28720 + }, + { + "epoch": 3.7910782631648408, + "grad_norm": 0.00044478950439952314, + "learning_rate": 3.3558916419098247e-07, + "loss": 0.0203, + "num_input_tokens_seen": 14140928, + "step": 28725 + }, + { + "epoch": 3.7917381549425895, + "grad_norm": 0.00045032083289697766, + "learning_rate": 3.3524492388660166e-07, + "loss": 0.0014, + "num_input_tokens_seen": 14143296, + "step": 28730 + }, + { + "epoch": 3.7923980467203378, + "grad_norm": 0.12213001400232315, + "learning_rate": 3.349008246716721e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14145920, + "step": 28735 + }, + { + "epoch": 3.7930579384980865, + "grad_norm": 0.015413171611726284, + "learning_rate": 3.345568666192261e-07, + "loss": 0.0854, + "num_input_tokens_seen": 14148480, + "step": 28740 + }, + { + "epoch": 3.7937178302758348, + "grad_norm": 0.009154030121862888, + "learning_rate": 3.3421304980226627e-07, + "loss": 0.0, + "num_input_tokens_seen": 14150976, + "step": 28745 + }, + { + "epoch": 3.794377722053583, + "grad_norm": 0.0014472492039203644, + "learning_rate": 3.338693742937657e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14153728, + "step": 28750 + }, + { + "epoch": 3.7950376138313318, + "grad_norm": 0.8823553323745728, + "learning_rate": 3.3352584016666654e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14156288, + "step": 28755 + }, + { + "epoch": 3.79569750560908, + "grad_norm": 0.38144099712371826, + "learning_rate": 3.3318244749388136e-07, + "loss": 0.0006, + "num_input_tokens_seen": 14158976, + "step": 28760 + }, + { + "epoch": 3.7963573973868288, + "grad_norm": 0.0008522819844074547, + "learning_rate": 3.328391963482934e-07, + "loss": 0.0048, + "num_input_tokens_seen": 14161472, + "step": 28765 + }, + { + "epoch": 3.797017289164577, + "grad_norm": 0.000978624913841486, + "learning_rate": 3.3249608680275455e-07, + "loss": 0.0, + "num_input_tokens_seen": 14163968, + "step": 28770 + }, + { + "epoch": 3.7976771809423253, + "grad_norm": 0.0012917830608785152, + "learning_rate": 3.3215311893008744e-07, + "loss": 0.0007, + "num_input_tokens_seen": 14166592, + "step": 28775 + }, + { + "epoch": 3.798337072720074, + "grad_norm": 0.00022298976546153426, + "learning_rate": 3.318102928030848e-07, + "loss": 0.0502, + "num_input_tokens_seen": 14169344, + "step": 28780 + }, + { + "epoch": 3.7989969644978223, + "grad_norm": 0.001345164841040969, + "learning_rate": 3.3146760849450916e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14171904, + "step": 28785 + }, + { + "epoch": 3.799656856275571, + "grad_norm": 0.005270975176244974, + "learning_rate": 3.3112506607709246e-07, + "loss": 0.0, + "num_input_tokens_seen": 14174336, + "step": 28790 + }, + { + "epoch": 3.8003167480533193, + "grad_norm": 0.0006610043928958476, + "learning_rate": 3.307826656235363e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14176640, + "step": 28795 + }, + { + "epoch": 3.8009766398310676, + "grad_norm": 0.0007634887588210404, + "learning_rate": 3.304404072065139e-07, + "loss": 0.028, + "num_input_tokens_seen": 14178944, + "step": 28800 + }, + { + "epoch": 3.8016365316088163, + "grad_norm": 0.003030109917744994, + "learning_rate": 3.30098290898666e-07, + "loss": 0.0, + "num_input_tokens_seen": 14181568, + "step": 28805 + }, + { + "epoch": 3.8022964233865646, + "grad_norm": 0.0007390666869468987, + "learning_rate": 3.2975631677260505e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14184128, + "step": 28810 + }, + { + "epoch": 3.8029563151643133, + "grad_norm": 0.021578386425971985, + "learning_rate": 3.294144849009122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14186560, + "step": 28815 + }, + { + "epoch": 3.8036162069420616, + "grad_norm": 17.728506088256836, + "learning_rate": 3.290727953561393e-07, + "loss": 0.0382, + "num_input_tokens_seen": 14189184, + "step": 28820 + }, + { + "epoch": 3.80427609871981, + "grad_norm": 0.0007388376980088651, + "learning_rate": 3.287312482108071e-07, + "loss": 0.0, + "num_input_tokens_seen": 14191616, + "step": 28825 + }, + { + "epoch": 3.804935990497558, + "grad_norm": 0.26988065242767334, + "learning_rate": 3.2838984353740593e-07, + "loss": 0.0738, + "num_input_tokens_seen": 14194432, + "step": 28830 + }, + { + "epoch": 3.805595882275307, + "grad_norm": 0.09984458237886429, + "learning_rate": 3.2804858140839764e-07, + "loss": 0.0114, + "num_input_tokens_seen": 14197120, + "step": 28835 + }, + { + "epoch": 3.806255774053055, + "grad_norm": 0.012381376698613167, + "learning_rate": 3.277074618962117e-07, + "loss": 0.0564, + "num_input_tokens_seen": 14199424, + "step": 28840 + }, + { + "epoch": 3.806915665830804, + "grad_norm": 7.645833829883486e-05, + "learning_rate": 3.2736648507324903e-07, + "loss": 0.086, + "num_input_tokens_seen": 14201792, + "step": 28845 + }, + { + "epoch": 3.807575557608552, + "grad_norm": 0.006288577802479267, + "learning_rate": 3.270256510118786e-07, + "loss": 0.0, + "num_input_tokens_seen": 14204416, + "step": 28850 + }, + { + "epoch": 3.8082354493863004, + "grad_norm": 0.0009563757921569049, + "learning_rate": 3.2668495978444065e-07, + "loss": 0.0016, + "num_input_tokens_seen": 14207104, + "step": 28855 + }, + { + "epoch": 3.808895341164049, + "grad_norm": 4.422444908414036e-05, + "learning_rate": 3.2634441146324445e-07, + "loss": 0.063, + "num_input_tokens_seen": 14209600, + "step": 28860 + }, + { + "epoch": 3.8095552329417974, + "grad_norm": 13.252848625183105, + "learning_rate": 3.26004006120568e-07, + "loss": 0.0611, + "num_input_tokens_seen": 14211840, + "step": 28865 + }, + { + "epoch": 3.810215124719546, + "grad_norm": 77.8260269165039, + "learning_rate": 3.256637438286612e-07, + "loss": 0.094, + "num_input_tokens_seen": 14214336, + "step": 28870 + }, + { + "epoch": 3.8108750164972944, + "grad_norm": 0.018460115417838097, + "learning_rate": 3.253236246597417e-07, + "loss": 0.0, + "num_input_tokens_seen": 14216640, + "step": 28875 + }, + { + "epoch": 3.8115349082750427, + "grad_norm": 0.020576654002070427, + "learning_rate": 3.2498364868599683e-07, + "loss": 0.0006, + "num_input_tokens_seen": 14219264, + "step": 28880 + }, + { + "epoch": 3.8121948000527914, + "grad_norm": 69.54541778564453, + "learning_rate": 3.2464381597958444e-07, + "loss": 0.0352, + "num_input_tokens_seen": 14221504, + "step": 28885 + }, + { + "epoch": 3.8128546918305397, + "grad_norm": 0.0036860329564660788, + "learning_rate": 3.243041266126316e-07, + "loss": 0.0, + "num_input_tokens_seen": 14223744, + "step": 28890 + }, + { + "epoch": 3.8135145836082884, + "grad_norm": 0.000731949636247009, + "learning_rate": 3.239645806572352e-07, + "loss": 0.0, + "num_input_tokens_seen": 14226304, + "step": 28895 + }, + { + "epoch": 3.8141744753860367, + "grad_norm": 0.007175610400736332, + "learning_rate": 3.2362517818546085e-07, + "loss": 0.0213, + "num_input_tokens_seen": 14228672, + "step": 28900 + }, + { + "epoch": 3.814834367163785, + "grad_norm": 0.0002437162766000256, + "learning_rate": 3.2328591926934446e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14231360, + "step": 28905 + }, + { + "epoch": 3.8154942589415337, + "grad_norm": 0.034070249646902084, + "learning_rate": 3.229468039808916e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14233856, + "step": 28910 + }, + { + "epoch": 3.816154150719282, + "grad_norm": 0.000245957839069888, + "learning_rate": 3.2260783239207644e-07, + "loss": 0.0, + "num_input_tokens_seen": 14236416, + "step": 28915 + }, + { + "epoch": 3.8168140424970307, + "grad_norm": 0.028583209961652756, + "learning_rate": 3.2226900457484354e-07, + "loss": 0.0, + "num_input_tokens_seen": 14238848, + "step": 28920 + }, + { + "epoch": 3.817473934274779, + "grad_norm": 0.30260807275772095, + "learning_rate": 3.21930320601107e-07, + "loss": 0.0217, + "num_input_tokens_seen": 14241728, + "step": 28925 + }, + { + "epoch": 3.8181338260525273, + "grad_norm": 0.0004018806212116033, + "learning_rate": 3.215917805427495e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14243904, + "step": 28930 + }, + { + "epoch": 3.818793717830276, + "grad_norm": 0.004953477066010237, + "learning_rate": 3.2125338447162386e-07, + "loss": 0.0, + "num_input_tokens_seen": 14246336, + "step": 28935 + }, + { + "epoch": 3.8194536096080243, + "grad_norm": 0.5943925380706787, + "learning_rate": 3.209151324595523e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14248512, + "step": 28940 + }, + { + "epoch": 3.820113501385773, + "grad_norm": 0.06596149504184723, + "learning_rate": 3.205770245783267e-07, + "loss": 0.0657, + "num_input_tokens_seen": 14250944, + "step": 28945 + }, + { + "epoch": 3.8207733931635213, + "grad_norm": 0.006771343760192394, + "learning_rate": 3.202390608997072e-07, + "loss": 0.1313, + "num_input_tokens_seen": 14253568, + "step": 28950 + }, + { + "epoch": 3.8214332849412695, + "grad_norm": 0.015993310138583183, + "learning_rate": 3.1990124149542465e-07, + "loss": 0.0, + "num_input_tokens_seen": 14256064, + "step": 28955 + }, + { + "epoch": 3.822093176719018, + "grad_norm": 52.393898010253906, + "learning_rate": 3.1956356643717896e-07, + "loss": 0.1208, + "num_input_tokens_seen": 14258304, + "step": 28960 + }, + { + "epoch": 3.8227530684967665, + "grad_norm": 0.0416153222322464, + "learning_rate": 3.1922603579663877e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14260608, + "step": 28965 + }, + { + "epoch": 3.823412960274515, + "grad_norm": 0.0024769201409071684, + "learning_rate": 3.188886496454426e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14263040, + "step": 28970 + }, + { + "epoch": 3.8240728520522635, + "grad_norm": 26.59957504272461, + "learning_rate": 3.185514080551986e-07, + "loss": 0.0844, + "num_input_tokens_seen": 14265344, + "step": 28975 + }, + { + "epoch": 3.824732743830012, + "grad_norm": 0.014377378858625889, + "learning_rate": 3.1821431109748344e-07, + "loss": 0.0, + "num_input_tokens_seen": 14267904, + "step": 28980 + }, + { + "epoch": 3.82539263560776, + "grad_norm": 0.003249021479859948, + "learning_rate": 3.178773588438438e-07, + "loss": 0.1095, + "num_input_tokens_seen": 14270400, + "step": 28985 + }, + { + "epoch": 3.826052527385509, + "grad_norm": 0.004426421597599983, + "learning_rate": 3.1754055136579463e-07, + "loss": 0.0, + "num_input_tokens_seen": 14272768, + "step": 28990 + }, + { + "epoch": 3.826712419163257, + "grad_norm": 0.0005180624430067837, + "learning_rate": 3.172038887348221e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14275136, + "step": 28995 + }, + { + "epoch": 3.827372310941006, + "grad_norm": 0.0005901391850784421, + "learning_rate": 3.168673710223797e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14277696, + "step": 29000 + }, + { + "epoch": 3.828032202718754, + "grad_norm": 0.0007131620077416301, + "learning_rate": 3.165309982998903e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14279872, + "step": 29005 + }, + { + "epoch": 3.8286920944965024, + "grad_norm": 0.0011300368933007121, + "learning_rate": 3.161947706387479e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14282432, + "step": 29010 + }, + { + "epoch": 3.829351986274251, + "grad_norm": 0.7849501967430115, + "learning_rate": 3.1585868811031337e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14284864, + "step": 29015 + }, + { + "epoch": 3.8300118780519994, + "grad_norm": 0.0024259083438664675, + "learning_rate": 3.155227507859185e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14287296, + "step": 29020 + }, + { + "epoch": 3.830671769829748, + "grad_norm": 0.004194983281195164, + "learning_rate": 3.1518695873686285e-07, + "loss": 0.0674, + "num_input_tokens_seen": 14289920, + "step": 29025 + }, + { + "epoch": 3.8313316616074964, + "grad_norm": 0.0013464801013469696, + "learning_rate": 3.1485131203441605e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14292416, + "step": 29030 + }, + { + "epoch": 3.8319915533852447, + "grad_norm": 14.864596366882324, + "learning_rate": 3.1451581074981726e-07, + "loss": 0.0065, + "num_input_tokens_seen": 14294592, + "step": 29035 + }, + { + "epoch": 3.8326514451629934, + "grad_norm": 0.004308775532990694, + "learning_rate": 3.141804549542735e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14297088, + "step": 29040 + }, + { + "epoch": 3.8333113369407417, + "grad_norm": 32.45550537109375, + "learning_rate": 3.138452447189617e-07, + "loss": 0.1579, + "num_input_tokens_seen": 14299712, + "step": 29045 + }, + { + "epoch": 3.8339712287184904, + "grad_norm": 0.0018099230946972966, + "learning_rate": 3.1351018011502837e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14301888, + "step": 29050 + }, + { + "epoch": 3.8346311204962387, + "grad_norm": 0.0064097810536623, + "learning_rate": 3.1317526121358785e-07, + "loss": 0.0567, + "num_input_tokens_seen": 14304256, + "step": 29055 + }, + { + "epoch": 3.835291012273987, + "grad_norm": 0.1308693140745163, + "learning_rate": 3.128404880857244e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14306752, + "step": 29060 + }, + { + "epoch": 3.8359509040517357, + "grad_norm": 0.0016533080488443375, + "learning_rate": 3.125058608024914e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14309248, + "step": 29065 + }, + { + "epoch": 3.836610795829484, + "grad_norm": 0.00398173276335001, + "learning_rate": 3.1217137943491144e-07, + "loss": 0.0164, + "num_input_tokens_seen": 14311872, + "step": 29070 + }, + { + "epoch": 3.8372706876072327, + "grad_norm": 0.0020239560399204493, + "learning_rate": 3.1183704405397494e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14314368, + "step": 29075 + }, + { + "epoch": 3.837930579384981, + "grad_norm": 43.287757873535156, + "learning_rate": 3.1150285473064255e-07, + "loss": 0.0381, + "num_input_tokens_seen": 14316864, + "step": 29080 + }, + { + "epoch": 3.8385904711627292, + "grad_norm": 49.01985168457031, + "learning_rate": 3.1116881153584387e-07, + "loss": 0.0239, + "num_input_tokens_seen": 14319360, + "step": 29085 + }, + { + "epoch": 3.8392503629404775, + "grad_norm": 0.00048302547656930983, + "learning_rate": 3.108349145404764e-07, + "loss": 0.0, + "num_input_tokens_seen": 14322048, + "step": 29090 + }, + { + "epoch": 3.8399102547182262, + "grad_norm": 0.0018678263295441866, + "learning_rate": 3.1050116381540793e-07, + "loss": 0.0382, + "num_input_tokens_seen": 14324480, + "step": 29095 + }, + { + "epoch": 3.8405701464959745, + "grad_norm": 0.43559908866882324, + "learning_rate": 3.101675594314747e-07, + "loss": 0.0021, + "num_input_tokens_seen": 14326976, + "step": 29100 + }, + { + "epoch": 3.8412300382737232, + "grad_norm": 0.14082865417003632, + "learning_rate": 3.098341014594813e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14329600, + "step": 29105 + }, + { + "epoch": 3.8418899300514715, + "grad_norm": 0.0021376083604991436, + "learning_rate": 3.0950078997020214e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14331968, + "step": 29110 + }, + { + "epoch": 3.84254982182922, + "grad_norm": 0.00955208856612444, + "learning_rate": 3.0916762503438e-07, + "loss": 0.0719, + "num_input_tokens_seen": 14334720, + "step": 29115 + }, + { + "epoch": 3.8432097136069685, + "grad_norm": 0.024048855528235435, + "learning_rate": 3.0883460672272724e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14337088, + "step": 29120 + }, + { + "epoch": 3.843869605384717, + "grad_norm": 0.05813174322247505, + "learning_rate": 3.0850173510592415e-07, + "loss": 0.001, + "num_input_tokens_seen": 14339264, + "step": 29125 + }, + { + "epoch": 3.8445294971624655, + "grad_norm": 0.013413142412900925, + "learning_rate": 3.0816901025461974e-07, + "loss": 0.0442, + "num_input_tokens_seen": 14341632, + "step": 29130 + }, + { + "epoch": 3.845189388940214, + "grad_norm": 0.003118762979283929, + "learning_rate": 3.0783643223943367e-07, + "loss": 0.0, + "num_input_tokens_seen": 14343872, + "step": 29135 + }, + { + "epoch": 3.845849280717962, + "grad_norm": 0.00037634363980032504, + "learning_rate": 3.075040011309522e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14346240, + "step": 29140 + }, + { + "epoch": 3.846509172495711, + "grad_norm": 0.003813160816207528, + "learning_rate": 3.0717171699973197e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14348544, + "step": 29145 + }, + { + "epoch": 3.847169064273459, + "grad_norm": 0.03408288210630417, + "learning_rate": 3.068395799162976e-07, + "loss": 0.0, + "num_input_tokens_seen": 14350784, + "step": 29150 + }, + { + "epoch": 3.847828956051208, + "grad_norm": 0.004677819553762674, + "learning_rate": 3.0650758995114335e-07, + "loss": 0.0, + "num_input_tokens_seen": 14353408, + "step": 29155 + }, + { + "epoch": 3.848488847828956, + "grad_norm": 0.051624976098537445, + "learning_rate": 3.061757471747313e-07, + "loss": 0.0, + "num_input_tokens_seen": 14355712, + "step": 29160 + }, + { + "epoch": 3.8491487396067043, + "grad_norm": 0.0331740602850914, + "learning_rate": 3.058440516574918e-07, + "loss": 0.0089, + "num_input_tokens_seen": 14358016, + "step": 29165 + }, + { + "epoch": 3.849808631384453, + "grad_norm": 0.0008157134870998561, + "learning_rate": 3.055125034698265e-07, + "loss": 0.0337, + "num_input_tokens_seen": 14360576, + "step": 29170 + }, + { + "epoch": 3.8504685231622013, + "grad_norm": 0.02268756367266178, + "learning_rate": 3.051811026821027e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14363008, + "step": 29175 + }, + { + "epoch": 3.85112841493995, + "grad_norm": 0.0016416346188634634, + "learning_rate": 3.04849849364659e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14365376, + "step": 29180 + }, + { + "epoch": 3.8517883067176983, + "grad_norm": 0.0015501509187743068, + "learning_rate": 3.045187435878003e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14367872, + "step": 29185 + }, + { + "epoch": 3.8524481984954466, + "grad_norm": 0.0018056805711239576, + "learning_rate": 3.041877854218021e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14370304, + "step": 29190 + }, + { + "epoch": 3.8531080902731953, + "grad_norm": 0.000659221550449729, + "learning_rate": 3.0385697493690807e-07, + "loss": 0.0, + "num_input_tokens_seen": 14372928, + "step": 29195 + }, + { + "epoch": 3.8537679820509436, + "grad_norm": 0.19510525465011597, + "learning_rate": 3.0352631220332945e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14375360, + "step": 29200 + }, + { + "epoch": 3.8544278738286923, + "grad_norm": 11.538311958312988, + "learning_rate": 3.031957972912482e-07, + "loss": 0.1616, + "num_input_tokens_seen": 14377920, + "step": 29205 + }, + { + "epoch": 3.8550877656064406, + "grad_norm": 0.0012093938421458006, + "learning_rate": 3.028654302708131e-07, + "loss": 0.0, + "num_input_tokens_seen": 14380352, + "step": 29210 + }, + { + "epoch": 3.855747657384189, + "grad_norm": 1.0782610177993774, + "learning_rate": 3.025352112121419e-07, + "loss": 0.0007, + "num_input_tokens_seen": 14382912, + "step": 29215 + }, + { + "epoch": 3.856407549161937, + "grad_norm": 4.327111309976317e-05, + "learning_rate": 3.022051401853214e-07, + "loss": 0.0, + "num_input_tokens_seen": 14385344, + "step": 29220 + }, + { + "epoch": 3.857067440939686, + "grad_norm": 0.07471462339162827, + "learning_rate": 3.018752172604069e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14387840, + "step": 29225 + }, + { + "epoch": 3.857727332717434, + "grad_norm": 0.00025852222461253405, + "learning_rate": 3.015454425074224e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14390016, + "step": 29230 + }, + { + "epoch": 3.858387224495183, + "grad_norm": 0.016328802332282066, + "learning_rate": 3.0121581599635973e-07, + "loss": 0.0, + "num_input_tokens_seen": 14392384, + "step": 29235 + }, + { + "epoch": 3.859047116272931, + "grad_norm": 0.0008336760802194476, + "learning_rate": 3.0088633779717975e-07, + "loss": 0.0, + "num_input_tokens_seen": 14394752, + "step": 29240 + }, + { + "epoch": 3.8597070080506795, + "grad_norm": 0.0012462205486372113, + "learning_rate": 3.0055700797981244e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14397184, + "step": 29245 + }, + { + "epoch": 3.860366899828428, + "grad_norm": 0.005022898782044649, + "learning_rate": 3.002278266141548e-07, + "loss": 0.0, + "num_input_tokens_seen": 14399744, + "step": 29250 + }, + { + "epoch": 3.8610267916061765, + "grad_norm": 0.0011758505133911967, + "learning_rate": 2.9989879377007375e-07, + "loss": 0.0, + "num_input_tokens_seen": 14402112, + "step": 29255 + }, + { + "epoch": 3.861686683383925, + "grad_norm": 0.0008672875701449811, + "learning_rate": 2.995699095174041e-07, + "loss": 0.0, + "num_input_tokens_seen": 14404544, + "step": 29260 + }, + { + "epoch": 3.8623465751616735, + "grad_norm": 0.037034500390291214, + "learning_rate": 2.9924117392594893e-07, + "loss": 0.0, + "num_input_tokens_seen": 14406720, + "step": 29265 + }, + { + "epoch": 3.8630064669394217, + "grad_norm": 0.0004481837968342006, + "learning_rate": 2.9891258706547997e-07, + "loss": 0.0, + "num_input_tokens_seen": 14409472, + "step": 29270 + }, + { + "epoch": 3.8636663587171705, + "grad_norm": 0.015096590854227543, + "learning_rate": 2.9858414900573757e-07, + "loss": 0.0366, + "num_input_tokens_seen": 14411904, + "step": 29275 + }, + { + "epoch": 3.8643262504949187, + "grad_norm": 0.0008525225566700101, + "learning_rate": 2.9825585981643064e-07, + "loss": 0.0411, + "num_input_tokens_seen": 14414400, + "step": 29280 + }, + { + "epoch": 3.8649861422726675, + "grad_norm": 0.030953820794820786, + "learning_rate": 2.9792771956723537e-07, + "loss": 0.0, + "num_input_tokens_seen": 14416896, + "step": 29285 + }, + { + "epoch": 3.8656460340504157, + "grad_norm": 0.0005172466626390815, + "learning_rate": 2.9759972832779776e-07, + "loss": 0.0, + "num_input_tokens_seen": 14419328, + "step": 29290 + }, + { + "epoch": 3.866305925828164, + "grad_norm": 1.1345044374465942, + "learning_rate": 2.972718861677317e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14421696, + "step": 29295 + }, + { + "epoch": 3.8669658176059127, + "grad_norm": 0.002292012795805931, + "learning_rate": 2.969441931566188e-07, + "loss": 0.0, + "num_input_tokens_seen": 14423936, + "step": 29300 + }, + { + "epoch": 3.867625709383661, + "grad_norm": 0.00030207863892428577, + "learning_rate": 2.9661664936400964e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14426432, + "step": 29305 + }, + { + "epoch": 3.8682856011614097, + "grad_norm": 0.0012762520927935839, + "learning_rate": 2.9628925485942357e-07, + "loss": 0.0, + "num_input_tokens_seen": 14428672, + "step": 29310 + }, + { + "epoch": 3.868945492939158, + "grad_norm": 0.014591632410883904, + "learning_rate": 2.9596200971234687e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14431040, + "step": 29315 + }, + { + "epoch": 3.8696053847169063, + "grad_norm": 0.00019203654665034264, + "learning_rate": 2.956349139922357e-07, + "loss": 0.0657, + "num_input_tokens_seen": 14433472, + "step": 29320 + }, + { + "epoch": 3.870265276494655, + "grad_norm": 0.06158406659960747, + "learning_rate": 2.9530796776851283e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14435968, + "step": 29325 + }, + { + "epoch": 3.8709251682724033, + "grad_norm": 0.03689207881689072, + "learning_rate": 2.9498117111057155e-07, + "loss": 0.0, + "num_input_tokens_seen": 14438336, + "step": 29330 + }, + { + "epoch": 3.871585060050152, + "grad_norm": 0.04720371589064598, + "learning_rate": 2.9465452408777126e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14440896, + "step": 29335 + }, + { + "epoch": 3.8722449518279003, + "grad_norm": 0.0012083809124305844, + "learning_rate": 2.943280267694399e-07, + "loss": 0.0, + "num_input_tokens_seen": 14443392, + "step": 29340 + }, + { + "epoch": 3.8729048436056486, + "grad_norm": 0.00017764570657163858, + "learning_rate": 2.940016792248754e-07, + "loss": 0.0009, + "num_input_tokens_seen": 14445952, + "step": 29345 + }, + { + "epoch": 3.873564735383397, + "grad_norm": 0.0009236446931026876, + "learning_rate": 2.936754815233417e-07, + "loss": 0.0611, + "num_input_tokens_seen": 14448256, + "step": 29350 + }, + { + "epoch": 3.8742246271611456, + "grad_norm": 0.000315178360324353, + "learning_rate": 2.933494337340726e-07, + "loss": 0.0, + "num_input_tokens_seen": 14450624, + "step": 29355 + }, + { + "epoch": 3.8748845189388943, + "grad_norm": 0.005778376944363117, + "learning_rate": 2.930235359262687e-07, + "loss": 0.0, + "num_input_tokens_seen": 14453056, + "step": 29360 + }, + { + "epoch": 3.8755444107166426, + "grad_norm": 0.007840816862881184, + "learning_rate": 2.9269778816909985e-07, + "loss": 0.0, + "num_input_tokens_seen": 14455616, + "step": 29365 + }, + { + "epoch": 3.876204302494391, + "grad_norm": 0.0008592153899371624, + "learning_rate": 2.9237219053170383e-07, + "loss": 0.0, + "num_input_tokens_seen": 14457792, + "step": 29370 + }, + { + "epoch": 3.876864194272139, + "grad_norm": 0.0005324503872543573, + "learning_rate": 2.920467430831858e-07, + "loss": 0.0, + "num_input_tokens_seen": 14460096, + "step": 29375 + }, + { + "epoch": 3.877524086049888, + "grad_norm": 0.12947431206703186, + "learning_rate": 2.917214458926199e-07, + "loss": 0.0019, + "num_input_tokens_seen": 14463040, + "step": 29380 + }, + { + "epoch": 3.878183977827636, + "grad_norm": 0.001886560581624508, + "learning_rate": 2.913962990290486e-07, + "loss": 0.0, + "num_input_tokens_seen": 14465472, + "step": 29385 + }, + { + "epoch": 3.878843869605385, + "grad_norm": 0.0008303842623718083, + "learning_rate": 2.910713025614812e-07, + "loss": 0.0891, + "num_input_tokens_seen": 14467968, + "step": 29390 + }, + { + "epoch": 3.879503761383133, + "grad_norm": 0.003242628648877144, + "learning_rate": 2.9074645655889604e-07, + "loss": 0.0, + "num_input_tokens_seen": 14470656, + "step": 29395 + }, + { + "epoch": 3.8801636531608814, + "grad_norm": 0.0005708981771022081, + "learning_rate": 2.904217610902396e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14472704, + "step": 29400 + }, + { + "epoch": 3.88082354493863, + "grad_norm": 0.0017631093505769968, + "learning_rate": 2.900972162244263e-07, + "loss": 0.0, + "num_input_tokens_seen": 14475136, + "step": 29405 + }, + { + "epoch": 3.8814834367163784, + "grad_norm": 15.109874725341797, + "learning_rate": 2.897728220303378e-07, + "loss": 0.0491, + "num_input_tokens_seen": 14477504, + "step": 29410 + }, + { + "epoch": 3.882143328494127, + "grad_norm": 0.0022156566847115755, + "learning_rate": 2.894485785768248e-07, + "loss": 0.0239, + "num_input_tokens_seen": 14479936, + "step": 29415 + }, + { + "epoch": 3.8828032202718754, + "grad_norm": 0.008582009002566338, + "learning_rate": 2.891244859327059e-07, + "loss": 0.1459, + "num_input_tokens_seen": 14482368, + "step": 29420 + }, + { + "epoch": 3.8834631120496237, + "grad_norm": 0.042062871158123016, + "learning_rate": 2.888005441667668e-07, + "loss": 0.0, + "num_input_tokens_seen": 14484736, + "step": 29425 + }, + { + "epoch": 3.8841230038273724, + "grad_norm": 144.55355834960938, + "learning_rate": 2.88476753347762e-07, + "loss": 0.0049, + "num_input_tokens_seen": 14487296, + "step": 29430 + }, + { + "epoch": 3.8847828956051207, + "grad_norm": 0.0020836309995502234, + "learning_rate": 2.881531135444143e-07, + "loss": 0.0, + "num_input_tokens_seen": 14489344, + "step": 29435 + }, + { + "epoch": 3.8854427873828694, + "grad_norm": 0.7218878865242004, + "learning_rate": 2.878296248254131e-07, + "loss": 0.0386, + "num_input_tokens_seen": 14492096, + "step": 29440 + }, + { + "epoch": 3.8861026791606177, + "grad_norm": 0.2119138091802597, + "learning_rate": 2.8750628725941685e-07, + "loss": 0.0065, + "num_input_tokens_seen": 14494720, + "step": 29445 + }, + { + "epoch": 3.886762570938366, + "grad_norm": 0.0037669632583856583, + "learning_rate": 2.8718310091505173e-07, + "loss": 0.0, + "num_input_tokens_seen": 14497280, + "step": 29450 + }, + { + "epoch": 3.8874224627161147, + "grad_norm": 0.08131466060876846, + "learning_rate": 2.8686006586091183e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14499904, + "step": 29455 + }, + { + "epoch": 3.888082354493863, + "grad_norm": 10.323993682861328, + "learning_rate": 2.8653718216555854e-07, + "loss": 0.0854, + "num_input_tokens_seen": 14502784, + "step": 29460 + }, + { + "epoch": 3.8887422462716117, + "grad_norm": 0.00036318288766779006, + "learning_rate": 2.8621444989752184e-07, + "loss": 0.0, + "num_input_tokens_seen": 14504960, + "step": 29465 + }, + { + "epoch": 3.88940213804936, + "grad_norm": 0.0004923275555483997, + "learning_rate": 2.858918691252997e-07, + "loss": 0.0, + "num_input_tokens_seen": 14507520, + "step": 29470 + }, + { + "epoch": 3.8900620298271082, + "grad_norm": 0.0016408892115578055, + "learning_rate": 2.855694399173568e-07, + "loss": 0.0, + "num_input_tokens_seen": 14510016, + "step": 29475 + }, + { + "epoch": 3.890721921604857, + "grad_norm": 0.01022788044065237, + "learning_rate": 2.8524716234212684e-07, + "loss": 0.0009, + "num_input_tokens_seen": 14512512, + "step": 29480 + }, + { + "epoch": 3.8913818133826052, + "grad_norm": 0.0026390019338577986, + "learning_rate": 2.849250364680108e-07, + "loss": 0.0, + "num_input_tokens_seen": 14514624, + "step": 29485 + }, + { + "epoch": 3.892041705160354, + "grad_norm": 0.002436618087813258, + "learning_rate": 2.846030623633778e-07, + "loss": 0.0, + "num_input_tokens_seen": 14516928, + "step": 29490 + }, + { + "epoch": 3.8927015969381022, + "grad_norm": 0.022925982251763344, + "learning_rate": 2.842812400965645e-07, + "loss": 0.0, + "num_input_tokens_seen": 14519296, + "step": 29495 + }, + { + "epoch": 3.8933614887158505, + "grad_norm": 23.102983474731445, + "learning_rate": 2.839595697358744e-07, + "loss": 0.1298, + "num_input_tokens_seen": 14521728, + "step": 29500 + }, + { + "epoch": 3.894021380493599, + "grad_norm": 0.002253438113257289, + "learning_rate": 2.836380513495812e-07, + "loss": 0.0023, + "num_input_tokens_seen": 14524224, + "step": 29505 + }, + { + "epoch": 3.8946812722713475, + "grad_norm": 0.0016631442122161388, + "learning_rate": 2.8331668500592374e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14526912, + "step": 29510 + }, + { + "epoch": 3.895341164049096, + "grad_norm": 0.004704699851572514, + "learning_rate": 2.829954707731104e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14529280, + "step": 29515 + }, + { + "epoch": 3.8960010558268445, + "grad_norm": 0.000636607815977186, + "learning_rate": 2.826744087193159e-07, + "loss": 0.0008, + "num_input_tokens_seen": 14531776, + "step": 29520 + }, + { + "epoch": 3.896660947604593, + "grad_norm": 0.02008168026804924, + "learning_rate": 2.823534989126838e-07, + "loss": 0.0009, + "num_input_tokens_seen": 14533952, + "step": 29525 + }, + { + "epoch": 3.897320839382341, + "grad_norm": 0.0013008936075493693, + "learning_rate": 2.820327414213249e-07, + "loss": 0.0, + "num_input_tokens_seen": 14536128, + "step": 29530 + }, + { + "epoch": 3.89798073116009, + "grad_norm": 0.2298823744058609, + "learning_rate": 2.8171213631331714e-07, + "loss": 0.024, + "num_input_tokens_seen": 14539072, + "step": 29535 + }, + { + "epoch": 3.898640622937838, + "grad_norm": 0.1926964521408081, + "learning_rate": 2.813916836567074e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14541632, + "step": 29540 + }, + { + "epoch": 3.899300514715587, + "grad_norm": 0.0007152509060688317, + "learning_rate": 2.810713835195092e-07, + "loss": 0.2078, + "num_input_tokens_seen": 14543680, + "step": 29545 + }, + { + "epoch": 3.899960406493335, + "grad_norm": 0.006173610687255859, + "learning_rate": 2.807512359697034e-07, + "loss": 0.0, + "num_input_tokens_seen": 14546048, + "step": 29550 + }, + { + "epoch": 3.9006202982710834, + "grad_norm": 0.11755349487066269, + "learning_rate": 2.8043124107523943e-07, + "loss": 0.0412, + "num_input_tokens_seen": 14548480, + "step": 29555 + }, + { + "epoch": 3.901280190048832, + "grad_norm": 0.4096464216709137, + "learning_rate": 2.801113989040338e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14550976, + "step": 29560 + }, + { + "epoch": 3.9019400818265804, + "grad_norm": 0.0008820955990813673, + "learning_rate": 2.7979170952397103e-07, + "loss": 0.0, + "num_input_tokens_seen": 14553600, + "step": 29565 + }, + { + "epoch": 3.902599973604329, + "grad_norm": 0.002521621063351631, + "learning_rate": 2.7947217300290225e-07, + "loss": 0.0056, + "num_input_tokens_seen": 14556160, + "step": 29570 + }, + { + "epoch": 3.9032598653820774, + "grad_norm": 0.042136672884225845, + "learning_rate": 2.791527894086472e-07, + "loss": 0.0337, + "num_input_tokens_seen": 14558912, + "step": 29575 + }, + { + "epoch": 3.9039197571598256, + "grad_norm": 0.00030493823578581214, + "learning_rate": 2.7883355880899286e-07, + "loss": 0.002, + "num_input_tokens_seen": 14561408, + "step": 29580 + }, + { + "epoch": 3.9045796489375744, + "grad_norm": 0.6785450577735901, + "learning_rate": 2.78514481271693e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14563648, + "step": 29585 + }, + { + "epoch": 3.9052395407153226, + "grad_norm": 0.0023029835429042578, + "learning_rate": 2.7819555686447004e-07, + "loss": 0.0, + "num_input_tokens_seen": 14565888, + "step": 29590 + }, + { + "epoch": 3.9058994324930714, + "grad_norm": 0.0056864372454583645, + "learning_rate": 2.7787678565501347e-07, + "loss": 0.008, + "num_input_tokens_seen": 14568384, + "step": 29595 + }, + { + "epoch": 3.9065593242708196, + "grad_norm": 0.0006198826595209539, + "learning_rate": 2.7755816771097963e-07, + "loss": 0.0, + "num_input_tokens_seen": 14570432, + "step": 29600 + }, + { + "epoch": 3.907219216048568, + "grad_norm": 0.0022146229166537523, + "learning_rate": 2.7723970309999324e-07, + "loss": 0.0154, + "num_input_tokens_seen": 14572864, + "step": 29605 + }, + { + "epoch": 3.9078791078263166, + "grad_norm": 0.011892078444361687, + "learning_rate": 2.7692139188964594e-07, + "loss": 0.0, + "num_input_tokens_seen": 14575104, + "step": 29610 + }, + { + "epoch": 3.908538999604065, + "grad_norm": 68.65377044677734, + "learning_rate": 2.766032341474975e-07, + "loss": 0.0083, + "num_input_tokens_seen": 14577664, + "step": 29615 + }, + { + "epoch": 3.9091988913818136, + "grad_norm": 0.418712854385376, + "learning_rate": 2.762852299410738e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14580352, + "step": 29620 + }, + { + "epoch": 3.909858783159562, + "grad_norm": 0.0004645238514058292, + "learning_rate": 2.759673793378694e-07, + "loss": 0.0, + "num_input_tokens_seen": 14582784, + "step": 29625 + }, + { + "epoch": 3.91051867493731, + "grad_norm": 0.008189682848751545, + "learning_rate": 2.7564968240534594e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14585216, + "step": 29630 + }, + { + "epoch": 3.9111785667150585, + "grad_norm": 0.01741664856672287, + "learning_rate": 2.753321392109318e-07, + "loss": 0.0611, + "num_input_tokens_seen": 14587584, + "step": 29635 + }, + { + "epoch": 3.911838458492807, + "grad_norm": 0.04173688963055611, + "learning_rate": 2.7501474982202345e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14589952, + "step": 29640 + }, + { + "epoch": 3.9124983502705555, + "grad_norm": 0.05172654241323471, + "learning_rate": 2.7469751430598486e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14592320, + "step": 29645 + }, + { + "epoch": 3.913158242048304, + "grad_norm": 0.009400231763720512, + "learning_rate": 2.743804327301462e-07, + "loss": 0.0266, + "num_input_tokens_seen": 14594560, + "step": 29650 + }, + { + "epoch": 3.9138181338260525, + "grad_norm": 0.01464253943413496, + "learning_rate": 2.7406350516180666e-07, + "loss": 0.0725, + "num_input_tokens_seen": 14597248, + "step": 29655 + }, + { + "epoch": 3.9144780256038008, + "grad_norm": 0.0002989550703205168, + "learning_rate": 2.7374673166823057e-07, + "loss": 0.0, + "num_input_tokens_seen": 14599488, + "step": 29660 + }, + { + "epoch": 3.9151379173815495, + "grad_norm": 0.0020613304805010557, + "learning_rate": 2.7343011231665227e-07, + "loss": 0.0, + "num_input_tokens_seen": 14601728, + "step": 29665 + }, + { + "epoch": 3.9157978091592978, + "grad_norm": 0.001036074128933251, + "learning_rate": 2.731136471742712e-07, + "loss": 0.0, + "num_input_tokens_seen": 14604160, + "step": 29670 + }, + { + "epoch": 3.9164577009370465, + "grad_norm": 0.0002730927080847323, + "learning_rate": 2.7279733630825417e-07, + "loss": 0.0, + "num_input_tokens_seen": 14606592, + "step": 29675 + }, + { + "epoch": 3.9171175927147948, + "grad_norm": 0.05069692060351372, + "learning_rate": 2.7248117978573725e-07, + "loss": 0.001, + "num_input_tokens_seen": 14609024, + "step": 29680 + }, + { + "epoch": 3.917777484492543, + "grad_norm": 0.0025865097995847464, + "learning_rate": 2.721651776738212e-07, + "loss": 0.1096, + "num_input_tokens_seen": 14611392, + "step": 29685 + }, + { + "epoch": 3.9184373762702918, + "grad_norm": 0.5897282361984253, + "learning_rate": 2.71849330039576e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14613760, + "step": 29690 + }, + { + "epoch": 3.91909726804804, + "grad_norm": 0.007200206164270639, + "learning_rate": 2.715336369500374e-07, + "loss": 0.0, + "num_input_tokens_seen": 14616128, + "step": 29695 + }, + { + "epoch": 3.9197571598257888, + "grad_norm": 0.001574324443936348, + "learning_rate": 2.712180984722091e-07, + "loss": 0.0441, + "num_input_tokens_seen": 14618816, + "step": 29700 + }, + { + "epoch": 3.920417051603537, + "grad_norm": 0.1959327608346939, + "learning_rate": 2.7090271467306235e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14621184, + "step": 29705 + }, + { + "epoch": 3.9210769433812853, + "grad_norm": 0.036899324506521225, + "learning_rate": 2.705874856195344e-07, + "loss": 0.0, + "num_input_tokens_seen": 14623936, + "step": 29710 + }, + { + "epoch": 3.921736835159034, + "grad_norm": 0.007074782159179449, + "learning_rate": 2.702724113785305e-07, + "loss": 0.0797, + "num_input_tokens_seen": 14626176, + "step": 29715 + }, + { + "epoch": 3.9223967269367823, + "grad_norm": 0.3928063213825226, + "learning_rate": 2.6995749201692353e-07, + "loss": 0.0506, + "num_input_tokens_seen": 14628608, + "step": 29720 + }, + { + "epoch": 3.923056618714531, + "grad_norm": 0.0009985992219299078, + "learning_rate": 2.696427276015518e-07, + "loss": 0.0011, + "num_input_tokens_seen": 14631424, + "step": 29725 + }, + { + "epoch": 3.9237165104922793, + "grad_norm": 0.00414725998416543, + "learning_rate": 2.693281181992225e-07, + "loss": 0.0049, + "num_input_tokens_seen": 14633792, + "step": 29730 + }, + { + "epoch": 3.9243764022700276, + "grad_norm": 0.0017239763401448727, + "learning_rate": 2.6901366387670885e-07, + "loss": 0.0009, + "num_input_tokens_seen": 14636352, + "step": 29735 + }, + { + "epoch": 3.9250362940477763, + "grad_norm": 0.0003734455385711044, + "learning_rate": 2.6869936470075214e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14638784, + "step": 29740 + }, + { + "epoch": 3.9256961858255246, + "grad_norm": 0.0018246241379529238, + "learning_rate": 2.6838522073805915e-07, + "loss": 0.0, + "num_input_tokens_seen": 14641408, + "step": 29745 + }, + { + "epoch": 3.9263560776032733, + "grad_norm": 0.0012204793747514486, + "learning_rate": 2.6807123205530523e-07, + "loss": 0.0, + "num_input_tokens_seen": 14643712, + "step": 29750 + }, + { + "epoch": 3.9270159693810216, + "grad_norm": 17.668453216552734, + "learning_rate": 2.677573987191323e-07, + "loss": 0.0412, + "num_input_tokens_seen": 14646336, + "step": 29755 + }, + { + "epoch": 3.92767586115877, + "grad_norm": 0.19213633239269257, + "learning_rate": 2.674437207961487e-07, + "loss": 0.0008, + "num_input_tokens_seen": 14648832, + "step": 29760 + }, + { + "epoch": 3.928335752936518, + "grad_norm": 0.007988156750798225, + "learning_rate": 2.671301983529307e-07, + "loss": 0.0, + "num_input_tokens_seen": 14651136, + "step": 29765 + }, + { + "epoch": 3.928995644714267, + "grad_norm": 7.79569149017334, + "learning_rate": 2.668168314560213e-07, + "loss": 0.0823, + "num_input_tokens_seen": 14653568, + "step": 29770 + }, + { + "epoch": 3.929655536492015, + "grad_norm": 0.01193348877131939, + "learning_rate": 2.6650362017192986e-07, + "loss": 0.0239, + "num_input_tokens_seen": 14656000, + "step": 29775 + }, + { + "epoch": 3.930315428269764, + "grad_norm": 0.03314467892050743, + "learning_rate": 2.661905645671335e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14658432, + "step": 29780 + }, + { + "epoch": 3.930975320047512, + "grad_norm": 0.0030468301847577095, + "learning_rate": 2.658776647080759e-07, + "loss": 0.0035, + "num_input_tokens_seen": 14661056, + "step": 29785 + }, + { + "epoch": 3.9316352118252604, + "grad_norm": 0.005730305332690477, + "learning_rate": 2.655649206611683e-07, + "loss": 0.0337, + "num_input_tokens_seen": 14663360, + "step": 29790 + }, + { + "epoch": 3.932295103603009, + "grad_norm": 0.00034679798409342766, + "learning_rate": 2.652523324927876e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14665856, + "step": 29795 + }, + { + "epoch": 3.9329549953807574, + "grad_norm": 0.0049493578262627125, + "learning_rate": 2.649399002692786e-07, + "loss": 0.0, + "num_input_tokens_seen": 14668224, + "step": 29800 + }, + { + "epoch": 3.933614887158506, + "grad_norm": 0.00087630475172773, + "learning_rate": 2.6462762405695314e-07, + "loss": 0.0, + "num_input_tokens_seen": 14670464, + "step": 29805 + }, + { + "epoch": 3.9342747789362544, + "grad_norm": 11.067977905273438, + "learning_rate": 2.6431550392208924e-07, + "loss": 0.0352, + "num_input_tokens_seen": 14673088, + "step": 29810 + }, + { + "epoch": 3.9349346707140027, + "grad_norm": 0.0014208820648491383, + "learning_rate": 2.6400353993093205e-07, + "loss": 0.0, + "num_input_tokens_seen": 14675584, + "step": 29815 + }, + { + "epoch": 3.9355945624917514, + "grad_norm": 0.014897317625582218, + "learning_rate": 2.636917321496939e-07, + "loss": 0.0, + "num_input_tokens_seen": 14678336, + "step": 29820 + }, + { + "epoch": 3.9362544542694997, + "grad_norm": 0.003759880783036351, + "learning_rate": 2.6338008064455395e-07, + "loss": 0.0, + "num_input_tokens_seen": 14680896, + "step": 29825 + }, + { + "epoch": 3.9369143460472484, + "grad_norm": 16.379169464111328, + "learning_rate": 2.6306858548165776e-07, + "loss": 0.0008, + "num_input_tokens_seen": 14683200, + "step": 29830 + }, + { + "epoch": 3.9375742378249967, + "grad_norm": 0.0020549760665744543, + "learning_rate": 2.627572467271172e-07, + "loss": 0.0, + "num_input_tokens_seen": 14685760, + "step": 29835 + }, + { + "epoch": 3.938234129602745, + "grad_norm": 0.054575130343437195, + "learning_rate": 2.62446064447013e-07, + "loss": 0.0, + "num_input_tokens_seen": 14688256, + "step": 29840 + }, + { + "epoch": 3.9388940213804937, + "grad_norm": 4.02533114538528e-05, + "learning_rate": 2.621350387073903e-07, + "loss": 0.0617, + "num_input_tokens_seen": 14690496, + "step": 29845 + }, + { + "epoch": 3.939553913158242, + "grad_norm": 0.005808962509036064, + "learning_rate": 2.618241695742628e-07, + "loss": 0.0, + "num_input_tokens_seen": 14692992, + "step": 29850 + }, + { + "epoch": 3.9402138049359907, + "grad_norm": 0.005495645571500063, + "learning_rate": 2.615134571136095e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14695168, + "step": 29855 + }, + { + "epoch": 3.940873696713739, + "grad_norm": 0.0014869053848087788, + "learning_rate": 2.6120290139137726e-07, + "loss": 0.1172, + "num_input_tokens_seen": 14697664, + "step": 29860 + }, + { + "epoch": 3.9415335884914873, + "grad_norm": 0.010261405259370804, + "learning_rate": 2.608925024734795e-07, + "loss": 0.0, + "num_input_tokens_seen": 14700480, + "step": 29865 + }, + { + "epoch": 3.942193480269236, + "grad_norm": 0.06723063439130783, + "learning_rate": 2.605822604257953e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14703296, + "step": 29870 + }, + { + "epoch": 3.9428533720469843, + "grad_norm": 0.002988782711327076, + "learning_rate": 2.6027217531417256e-07, + "loss": 0.0, + "num_input_tokens_seen": 14705408, + "step": 29875 + }, + { + "epoch": 3.943513263824733, + "grad_norm": 0.14848151803016663, + "learning_rate": 2.5996224720442394e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14707712, + "step": 29880 + }, + { + "epoch": 3.9441731556024813, + "grad_norm": 0.000740960007533431, + "learning_rate": 2.59652476162329e-07, + "loss": 0.0, + "num_input_tokens_seen": 14710208, + "step": 29885 + }, + { + "epoch": 3.9448330473802296, + "grad_norm": 0.002873439807444811, + "learning_rate": 2.593428622536349e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14712640, + "step": 29890 + }, + { + "epoch": 3.945492939157978, + "grad_norm": 0.0018458872800692916, + "learning_rate": 2.5903340554405485e-07, + "loss": 0.061, + "num_input_tokens_seen": 14715136, + "step": 29895 + }, + { + "epoch": 3.9461528309357266, + "grad_norm": 0.01930380053818226, + "learning_rate": 2.587241060992691e-07, + "loss": 0.0, + "num_input_tokens_seen": 14717824, + "step": 29900 + }, + { + "epoch": 3.946812722713475, + "grad_norm": 0.007538790814578533, + "learning_rate": 2.5841496398492366e-07, + "loss": 0.0, + "num_input_tokens_seen": 14720320, + "step": 29905 + }, + { + "epoch": 3.9474726144912236, + "grad_norm": 0.02111750654876232, + "learning_rate": 2.5810597926663205e-07, + "loss": 0.0, + "num_input_tokens_seen": 14722688, + "step": 29910 + }, + { + "epoch": 3.948132506268972, + "grad_norm": 46.01771545410156, + "learning_rate": 2.577971520099741e-07, + "loss": 0.1273, + "num_input_tokens_seen": 14724928, + "step": 29915 + }, + { + "epoch": 3.94879239804672, + "grad_norm": 0.007815618999302387, + "learning_rate": 2.574884822804958e-07, + "loss": 0.0, + "num_input_tokens_seen": 14727360, + "step": 29920 + }, + { + "epoch": 3.949452289824469, + "grad_norm": 0.0002846256538759917, + "learning_rate": 2.571799701437103e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14729856, + "step": 29925 + }, + { + "epoch": 3.950112181602217, + "grad_norm": 0.369070440530777, + "learning_rate": 2.568716156650974e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14732224, + "step": 29930 + }, + { + "epoch": 3.950772073379966, + "grad_norm": 0.002331068040803075, + "learning_rate": 2.5656341891010236e-07, + "loss": 0.028, + "num_input_tokens_seen": 14734912, + "step": 29935 + }, + { + "epoch": 3.951431965157714, + "grad_norm": 0.0062283482402563095, + "learning_rate": 2.5625537994413825e-07, + "loss": 0.0, + "num_input_tokens_seen": 14737216, + "step": 29940 + }, + { + "epoch": 3.9520918569354624, + "grad_norm": 2.5231564044952393, + "learning_rate": 2.559474988325838e-07, + "loss": 0.0523, + "num_input_tokens_seen": 14739648, + "step": 29945 + }, + { + "epoch": 3.952751748713211, + "grad_norm": 19.905338287353516, + "learning_rate": 2.556397756407852e-07, + "loss": 0.1603, + "num_input_tokens_seen": 14742400, + "step": 29950 + }, + { + "epoch": 3.9534116404909594, + "grad_norm": 0.009885110892355442, + "learning_rate": 2.5533221043405364e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14744832, + "step": 29955 + }, + { + "epoch": 3.954071532268708, + "grad_norm": 0.005242446903139353, + "learning_rate": 2.5502480327766785e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14747392, + "step": 29960 + }, + { + "epoch": 3.9547314240464564, + "grad_norm": 12.402752876281738, + "learning_rate": 2.5471755423687326e-07, + "loss": 0.02, + "num_input_tokens_seen": 14749952, + "step": 29965 + }, + { + "epoch": 3.9553913158242047, + "grad_norm": 0.01866605319082737, + "learning_rate": 2.5441046337688053e-07, + "loss": 0.0, + "num_input_tokens_seen": 14752384, + "step": 29970 + }, + { + "epoch": 3.9560512076019534, + "grad_norm": 0.0028618343640118837, + "learning_rate": 2.541035307628678e-07, + "loss": 0.0849, + "num_input_tokens_seen": 14754880, + "step": 29975 + }, + { + "epoch": 3.9567110993797017, + "grad_norm": 0.0022470192052423954, + "learning_rate": 2.5379675645997965e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14757184, + "step": 29980 + }, + { + "epoch": 3.9573709911574504, + "grad_norm": 14.184697151184082, + "learning_rate": 2.5349014053332604e-07, + "loss": 0.0546, + "num_input_tokens_seen": 14759744, + "step": 29985 + }, + { + "epoch": 3.9580308829351987, + "grad_norm": 0.010952018201351166, + "learning_rate": 2.5318368304798464e-07, + "loss": 0.0, + "num_input_tokens_seen": 14762112, + "step": 29990 + }, + { + "epoch": 3.958690774712947, + "grad_norm": 0.01866159960627556, + "learning_rate": 2.5287738406899783e-07, + "loss": 0.0, + "num_input_tokens_seen": 14764608, + "step": 29995 + }, + { + "epoch": 3.9593506664906957, + "grad_norm": 0.0018119042506441474, + "learning_rate": 2.525712436613767e-07, + "loss": 0.0015, + "num_input_tokens_seen": 14767104, + "step": 30000 + }, + { + "epoch": 3.960010558268444, + "grad_norm": 0.06305401027202606, + "learning_rate": 2.5226526189009656e-07, + "loss": 0.0012, + "num_input_tokens_seen": 14769792, + "step": 30005 + }, + { + "epoch": 3.9606704500461927, + "grad_norm": 0.1402633786201477, + "learning_rate": 2.519594388200994e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14772224, + "step": 30010 + }, + { + "epoch": 3.961330341823941, + "grad_norm": 0.0025317424442619085, + "learning_rate": 2.51653774516295e-07, + "loss": 0.0007, + "num_input_tokens_seen": 14774784, + "step": 30015 + }, + { + "epoch": 3.9619902336016892, + "grad_norm": 31.066204071044922, + "learning_rate": 2.5134826904355767e-07, + "loss": 0.0472, + "num_input_tokens_seen": 14777088, + "step": 30020 + }, + { + "epoch": 3.9626501253794375, + "grad_norm": 0.0028152584563940763, + "learning_rate": 2.510429224667291e-07, + "loss": 0.0, + "num_input_tokens_seen": 14779264, + "step": 30025 + }, + { + "epoch": 3.9633100171571862, + "grad_norm": 0.011473532766103745, + "learning_rate": 2.5073773485061645e-07, + "loss": 0.0, + "num_input_tokens_seen": 14781696, + "step": 30030 + }, + { + "epoch": 3.9639699089349345, + "grad_norm": 0.004749370273202658, + "learning_rate": 2.504327062599939e-07, + "loss": 0.0704, + "num_input_tokens_seen": 14784384, + "step": 30035 + }, + { + "epoch": 3.9646298007126832, + "grad_norm": 0.008181129582226276, + "learning_rate": 2.501278367596017e-07, + "loss": 0.0657, + "num_input_tokens_seen": 14786752, + "step": 30040 + }, + { + "epoch": 3.9652896924904315, + "grad_norm": 0.021034974604845047, + "learning_rate": 2.498231264141458e-07, + "loss": 0.0, + "num_input_tokens_seen": 14789312, + "step": 30045 + }, + { + "epoch": 3.96594958426818, + "grad_norm": 0.0029732969123870134, + "learning_rate": 2.495185752882989e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14791488, + "step": 30050 + }, + { + "epoch": 3.9666094760459285, + "grad_norm": 0.019406119361519814, + "learning_rate": 2.492141834467002e-07, + "loss": 0.0, + "num_input_tokens_seen": 14793856, + "step": 30055 + }, + { + "epoch": 3.967269367823677, + "grad_norm": 0.007835861295461655, + "learning_rate": 2.4890995095395397e-07, + "loss": 0.0518, + "num_input_tokens_seen": 14796352, + "step": 30060 + }, + { + "epoch": 3.9679292596014255, + "grad_norm": 0.0039335619658231735, + "learning_rate": 2.486058778746316e-07, + "loss": 0.0, + "num_input_tokens_seen": 14798976, + "step": 30065 + }, + { + "epoch": 3.968589151379174, + "grad_norm": 0.003976741805672646, + "learning_rate": 2.4830196427327056e-07, + "loss": 0.0518, + "num_input_tokens_seen": 14801472, + "step": 30070 + }, + { + "epoch": 3.969249043156922, + "grad_norm": 0.03188467025756836, + "learning_rate": 2.4799821021437463e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14804224, + "step": 30075 + }, + { + "epoch": 3.969908934934671, + "grad_norm": 0.013812188059091568, + "learning_rate": 2.476946157624126e-07, + "loss": 0.0, + "num_input_tokens_seen": 14806464, + "step": 30080 + }, + { + "epoch": 3.970568826712419, + "grad_norm": 0.10443263500928879, + "learning_rate": 2.4739118098182055e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14809216, + "step": 30085 + }, + { + "epoch": 3.971228718490168, + "grad_norm": 0.007619071286171675, + "learning_rate": 2.470879059370008e-07, + "loss": 0.0, + "num_input_tokens_seen": 14811392, + "step": 30090 + }, + { + "epoch": 3.971888610267916, + "grad_norm": 0.008477813564240932, + "learning_rate": 2.467847906923205e-07, + "loss": 0.0, + "num_input_tokens_seen": 14813824, + "step": 30095 + }, + { + "epoch": 3.9725485020456643, + "grad_norm": 0.0526459701359272, + "learning_rate": 2.4648183531211397e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14816000, + "step": 30100 + }, + { + "epoch": 3.973208393823413, + "grad_norm": 0.011147763580083847, + "learning_rate": 2.4617903986068146e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14818368, + "step": 30105 + }, + { + "epoch": 3.9738682856011613, + "grad_norm": 0.007487526163458824, + "learning_rate": 2.458764044022892e-07, + "loss": 0.0939, + "num_input_tokens_seen": 14820544, + "step": 30110 + }, + { + "epoch": 3.97452817737891, + "grad_norm": 0.23043251037597656, + "learning_rate": 2.455739290011689e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14822912, + "step": 30115 + }, + { + "epoch": 3.9751880691566583, + "grad_norm": 0.005081063602119684, + "learning_rate": 2.452716137215191e-07, + "loss": 0.0626, + "num_input_tokens_seen": 14825152, + "step": 30120 + }, + { + "epoch": 3.9758479609344066, + "grad_norm": 0.0004440572520252317, + "learning_rate": 2.449694586275042e-07, + "loss": 0.0, + "num_input_tokens_seen": 14827776, + "step": 30125 + }, + { + "epoch": 3.9765078527121553, + "grad_norm": 0.012423052452504635, + "learning_rate": 2.4466746378325384e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14830336, + "step": 30130 + }, + { + "epoch": 3.9771677444899036, + "grad_norm": 0.012256015092134476, + "learning_rate": 2.4436562925286473e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14832896, + "step": 30135 + }, + { + "epoch": 3.9778276362676523, + "grad_norm": 0.003797542303800583, + "learning_rate": 2.440639551003992e-07, + "loss": 0.0213, + "num_input_tokens_seen": 14835136, + "step": 30140 + }, + { + "epoch": 3.9784875280454006, + "grad_norm": 0.008668414317071438, + "learning_rate": 2.437624413898849e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14837440, + "step": 30145 + }, + { + "epoch": 3.979147419823149, + "grad_norm": 17.620750427246094, + "learning_rate": 2.4346108818531605e-07, + "loss": 0.0549, + "num_input_tokens_seen": 14840128, + "step": 30150 + }, + { + "epoch": 3.979807311600897, + "grad_norm": 0.28982800245285034, + "learning_rate": 2.4315989555065284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14842560, + "step": 30155 + }, + { + "epoch": 3.980467203378646, + "grad_norm": 0.0032483511604368687, + "learning_rate": 2.428588635498215e-07, + "loss": 0.0, + "num_input_tokens_seen": 14844928, + "step": 30160 + }, + { + "epoch": 3.9811270951563946, + "grad_norm": 0.0006455311668105423, + "learning_rate": 2.425579922467137e-07, + "loss": 0.0, + "num_input_tokens_seen": 14847104, + "step": 30165 + }, + { + "epoch": 3.981786986934143, + "grad_norm": 0.0021415213122963905, + "learning_rate": 2.4225728170518636e-07, + "loss": 0.0, + "num_input_tokens_seen": 14849664, + "step": 30170 + }, + { + "epoch": 3.982446878711891, + "grad_norm": 0.004268865566700697, + "learning_rate": 2.419567319890645e-07, + "loss": 0.0518, + "num_input_tokens_seen": 14851968, + "step": 30175 + }, + { + "epoch": 3.9831067704896395, + "grad_norm": 0.0018845315789803863, + "learning_rate": 2.416563431621366e-07, + "loss": 0.0, + "num_input_tokens_seen": 14854400, + "step": 30180 + }, + { + "epoch": 3.983766662267388, + "grad_norm": 0.014987271279096603, + "learning_rate": 2.413561152881587e-07, + "loss": 0.0024, + "num_input_tokens_seen": 14857024, + "step": 30185 + }, + { + "epoch": 3.9844265540451365, + "grad_norm": 0.0005198507569730282, + "learning_rate": 2.410560484308514e-07, + "loss": 0.0, + "num_input_tokens_seen": 14859264, + "step": 30190 + }, + { + "epoch": 3.985086445822885, + "grad_norm": 0.014474891126155853, + "learning_rate": 2.407561426539019e-07, + "loss": 0.0, + "num_input_tokens_seen": 14861824, + "step": 30195 + }, + { + "epoch": 3.9857463376006335, + "grad_norm": 0.40284138917922974, + "learning_rate": 2.404563980209634e-07, + "loss": 0.0037, + "num_input_tokens_seen": 14864256, + "step": 30200 + }, + { + "epoch": 3.9864062293783817, + "grad_norm": 0.0001849651162046939, + "learning_rate": 2.401568145956537e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14866432, + "step": 30205 + }, + { + "epoch": 3.9870661211561305, + "grad_norm": 0.003026962745934725, + "learning_rate": 2.398573924415583e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14869248, + "step": 30210 + }, + { + "epoch": 3.9877260129338787, + "grad_norm": 0.0028771180659532547, + "learning_rate": 2.395581316222269e-07, + "loss": 0.0, + "num_input_tokens_seen": 14871552, + "step": 30215 + }, + { + "epoch": 3.9883859047116275, + "grad_norm": 0.029063567519187927, + "learning_rate": 2.3925903220117506e-07, + "loss": 0.0503, + "num_input_tokens_seen": 14874432, + "step": 30220 + }, + { + "epoch": 3.9890457964893757, + "grad_norm": 0.001278785872273147, + "learning_rate": 2.389600942418848e-07, + "loss": 0.0253, + "num_input_tokens_seen": 14876800, + "step": 30225 + }, + { + "epoch": 3.989705688267124, + "grad_norm": 0.006419615354388952, + "learning_rate": 2.386613178078035e-07, + "loss": 0.0, + "num_input_tokens_seen": 14879168, + "step": 30230 + }, + { + "epoch": 3.9903655800448727, + "grad_norm": 0.013536657206714153, + "learning_rate": 2.3836270296234463e-07, + "loss": 0.0, + "num_input_tokens_seen": 14881728, + "step": 30235 + }, + { + "epoch": 3.991025471822621, + "grad_norm": 0.11201123893260956, + "learning_rate": 2.3806424976888639e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14884288, + "step": 30240 + }, + { + "epoch": 3.9916853636003697, + "grad_norm": 0.009009967558085918, + "learning_rate": 2.3776595829077362e-07, + "loss": 0.0472, + "num_input_tokens_seen": 14886976, + "step": 30245 + }, + { + "epoch": 3.992345255378118, + "grad_norm": 0.8810946345329285, + "learning_rate": 2.3746782859131685e-07, + "loss": 0.0006, + "num_input_tokens_seen": 14889600, + "step": 30250 + }, + { + "epoch": 3.9930051471558663, + "grad_norm": 0.00014014226326253265, + "learning_rate": 2.371698607337913e-07, + "loss": 0.0565, + "num_input_tokens_seen": 14892224, + "step": 30255 + }, + { + "epoch": 3.993665038933615, + "grad_norm": 0.03605637326836586, + "learning_rate": 2.368720547814389e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14894592, + "step": 30260 + }, + { + "epoch": 3.9943249307113633, + "grad_norm": 0.00678131403401494, + "learning_rate": 2.3657441079746698e-07, + "loss": 0.0, + "num_input_tokens_seen": 14897344, + "step": 30265 + }, + { + "epoch": 3.994984822489112, + "grad_norm": 0.002024312736466527, + "learning_rate": 2.362769288450478e-07, + "loss": 0.0411, + "num_input_tokens_seen": 14899904, + "step": 30270 + }, + { + "epoch": 3.9956447142668603, + "grad_norm": 0.06557377427816391, + "learning_rate": 2.3597960898731995e-07, + "loss": 0.0, + "num_input_tokens_seen": 14902400, + "step": 30275 + }, + { + "epoch": 3.9963046060446086, + "grad_norm": 0.0008136624819599092, + "learning_rate": 2.356824512873876e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14904704, + "step": 30280 + }, + { + "epoch": 3.9969644978223573, + "grad_norm": 0.0027104674372822046, + "learning_rate": 2.3538545580832047e-07, + "loss": 0.0009, + "num_input_tokens_seen": 14907520, + "step": 30285 + }, + { + "epoch": 3.9976243896001056, + "grad_norm": 12.874913215637207, + "learning_rate": 2.350886226131531e-07, + "loss": 0.0414, + "num_input_tokens_seen": 14909952, + "step": 30290 + }, + { + "epoch": 3.9982842813778543, + "grad_norm": 0.008480180986225605, + "learning_rate": 2.3479195176488664e-07, + "loss": 0.0, + "num_input_tokens_seen": 14912640, + "step": 30295 + }, + { + "epoch": 3.9989441731556026, + "grad_norm": 0.0009021972655318677, + "learning_rate": 2.344954433264874e-07, + "loss": 0.0626, + "num_input_tokens_seen": 14915136, + "step": 30300 + }, + { + "epoch": 3.999604064933351, + "grad_norm": 0.0008285566582344472, + "learning_rate": 2.3419909736088672e-07, + "loss": 0.0, + "num_input_tokens_seen": 14917504, + "step": 30305 + }, + { + "epoch": 4.000263956711099, + "grad_norm": 0.003723538015037775, + "learning_rate": 2.3390291393098215e-07, + "loss": 0.0, + "num_input_tokens_seen": 14919888, + "step": 30310 + }, + { + "epoch": 4.000923848488847, + "grad_norm": 0.0029248909559100866, + "learning_rate": 2.3360689309963666e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14922640, + "step": 30315 + }, + { + "epoch": 4.001583740266597, + "grad_norm": 0.010353301651775837, + "learning_rate": 2.333110349296782e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14924944, + "step": 30320 + }, + { + "epoch": 4.001583740266597, + "eval_loss": 0.1816491037607193, + "eval_runtime": 7.8451, + "eval_samples_per_second": 858.502, + "eval_steps_per_second": 107.329, + "num_input_tokens_seen": 14924944, + "step": 30320 + }, + { + "epoch": 4.002243632044345, + "grad_norm": 0.011572030372917652, + "learning_rate": 2.3301533948390072e-07, + "loss": 0.0, + "num_input_tokens_seen": 14927632, + "step": 30325 + }, + { + "epoch": 4.002903523822093, + "grad_norm": 0.005584248807281256, + "learning_rate": 2.3271980682506297e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14930128, + "step": 30330 + }, + { + "epoch": 4.003563415599841, + "grad_norm": 0.018282150849699974, + "learning_rate": 2.3242443701589054e-07, + "loss": 0.0, + "num_input_tokens_seen": 14932688, + "step": 30335 + }, + { + "epoch": 4.00422330737759, + "grad_norm": 8.024270937312394e-05, + "learning_rate": 2.3212923011907305e-07, + "loss": 0.0002, + "num_input_tokens_seen": 14935120, + "step": 30340 + }, + { + "epoch": 4.004883199155339, + "grad_norm": 0.0024475615937262774, + "learning_rate": 2.3183418619726523e-07, + "loss": 0.0, + "num_input_tokens_seen": 14937488, + "step": 30345 + }, + { + "epoch": 4.005543090933087, + "grad_norm": 0.015102401375770569, + "learning_rate": 2.3153930531308952e-07, + "loss": 0.0176, + "num_input_tokens_seen": 14939984, + "step": 30350 + }, + { + "epoch": 4.006202982710835, + "grad_norm": 0.003354162210598588, + "learning_rate": 2.3124458752913123e-07, + "loss": 0.0, + "num_input_tokens_seen": 14942416, + "step": 30355 + }, + { + "epoch": 4.006862874488584, + "grad_norm": 1.4745862483978271, + "learning_rate": 2.3095003290794258e-07, + "loss": 0.0006, + "num_input_tokens_seen": 14944656, + "step": 30360 + }, + { + "epoch": 4.007522766266332, + "grad_norm": 2.048581518465653e-05, + "learning_rate": 2.306556415120401e-07, + "loss": 0.0018, + "num_input_tokens_seen": 14947344, + "step": 30365 + }, + { + "epoch": 4.008182658044081, + "grad_norm": 0.0013990188017487526, + "learning_rate": 2.3036141340390657e-07, + "loss": 0.0014, + "num_input_tokens_seen": 14949648, + "step": 30370 + }, + { + "epoch": 4.008842549821829, + "grad_norm": 0.1011928915977478, + "learning_rate": 2.3006734864599008e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14952464, + "step": 30375 + }, + { + "epoch": 4.009502441599578, + "grad_norm": 0.002364733023568988, + "learning_rate": 2.2977344730070314e-07, + "loss": 0.0004, + "num_input_tokens_seen": 14954896, + "step": 30380 + }, + { + "epoch": 4.010162333377326, + "grad_norm": 0.1956844925880432, + "learning_rate": 2.294797094304244e-07, + "loss": 0.0504, + "num_input_tokens_seen": 14957456, + "step": 30385 + }, + { + "epoch": 4.010822225155074, + "grad_norm": 0.00027462790603749454, + "learning_rate": 2.2918613509749795e-07, + "loss": 0.0, + "num_input_tokens_seen": 14959696, + "step": 30390 + }, + { + "epoch": 4.011482116932823, + "grad_norm": 0.02837277203798294, + "learning_rate": 2.2889272436423233e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14962192, + "step": 30395 + }, + { + "epoch": 4.012142008710572, + "grad_norm": 0.0019921245984733105, + "learning_rate": 2.2859947729290207e-07, + "loss": 0.0, + "num_input_tokens_seen": 14964432, + "step": 30400 + }, + { + "epoch": 4.01280190048832, + "grad_norm": 0.000512312282808125, + "learning_rate": 2.2830639394574657e-07, + "loss": 0.0, + "num_input_tokens_seen": 14967056, + "step": 30405 + }, + { + "epoch": 4.013461792266068, + "grad_norm": 0.0010396696161478758, + "learning_rate": 2.280134743849712e-07, + "loss": 0.0, + "num_input_tokens_seen": 14969296, + "step": 30410 + }, + { + "epoch": 4.0141216840438165, + "grad_norm": 8.928061485290527, + "learning_rate": 2.2772071867274524e-07, + "loss": 0.056, + "num_input_tokens_seen": 14971600, + "step": 30415 + }, + { + "epoch": 4.014781575821566, + "grad_norm": 0.005695475731045008, + "learning_rate": 2.2742812687120438e-07, + "loss": 0.0006, + "num_input_tokens_seen": 14973904, + "step": 30420 + }, + { + "epoch": 4.015441467599314, + "grad_norm": 0.0021254941821098328, + "learning_rate": 2.2713569904244934e-07, + "loss": 0.001, + "num_input_tokens_seen": 14976592, + "step": 30425 + }, + { + "epoch": 4.016101359377062, + "grad_norm": 0.00028210715390741825, + "learning_rate": 2.268434352485452e-07, + "loss": 0.0005, + "num_input_tokens_seen": 14979408, + "step": 30430 + }, + { + "epoch": 4.0167612511548105, + "grad_norm": 0.005418936721980572, + "learning_rate": 2.265513355515233e-07, + "loss": 0.0003, + "num_input_tokens_seen": 14981776, + "step": 30435 + }, + { + "epoch": 4.017421142932559, + "grad_norm": 0.0015027726767584682, + "learning_rate": 2.262594000133795e-07, + "loss": 0.0, + "num_input_tokens_seen": 14984208, + "step": 30440 + }, + { + "epoch": 4.018081034710307, + "grad_norm": 0.002356699900701642, + "learning_rate": 2.2596762869607521e-07, + "loss": 0.0007, + "num_input_tokens_seen": 14986704, + "step": 30445 + }, + { + "epoch": 4.018740926488056, + "grad_norm": 0.0013817485887557268, + "learning_rate": 2.2567602166153653e-07, + "loss": 0.0, + "num_input_tokens_seen": 14989328, + "step": 30450 + }, + { + "epoch": 4.0194008182658045, + "grad_norm": 0.09678442031145096, + "learning_rate": 2.2538457897165498e-07, + "loss": 0.0001, + "num_input_tokens_seen": 14991568, + "step": 30455 + }, + { + "epoch": 4.020060710043553, + "grad_norm": 0.0013658119132742286, + "learning_rate": 2.2509330068828748e-07, + "loss": 0.028, + "num_input_tokens_seen": 14993680, + "step": 30460 + }, + { + "epoch": 4.020720601821301, + "grad_norm": 0.009650514461100101, + "learning_rate": 2.2480218687325515e-07, + "loss": 0.0, + "num_input_tokens_seen": 14996048, + "step": 30465 + }, + { + "epoch": 4.021380493599049, + "grad_norm": 0.0006762424600310624, + "learning_rate": 2.2451123758834512e-07, + "loss": 0.0, + "num_input_tokens_seen": 14998544, + "step": 30470 + }, + { + "epoch": 4.0220403853767985, + "grad_norm": 142.475341796875, + "learning_rate": 2.2422045289530967e-07, + "loss": 0.0337, + "num_input_tokens_seen": 15000976, + "step": 30475 + }, + { + "epoch": 4.022700277154547, + "grad_norm": 0.04685702919960022, + "learning_rate": 2.2392983285586487e-07, + "loss": 0.0, + "num_input_tokens_seen": 15003408, + "step": 30480 + }, + { + "epoch": 4.023360168932295, + "grad_norm": 0.0030901760328561068, + "learning_rate": 2.2363937753169338e-07, + "loss": 0.0383, + "num_input_tokens_seen": 15005904, + "step": 30485 + }, + { + "epoch": 4.024020060710043, + "grad_norm": 0.0033339818473905325, + "learning_rate": 2.2334908698444188e-07, + "loss": 0.0239, + "num_input_tokens_seen": 15008400, + "step": 30490 + }, + { + "epoch": 4.024679952487792, + "grad_norm": 0.00055954564595595, + "learning_rate": 2.23058961275723e-07, + "loss": 0.0, + "num_input_tokens_seen": 15010960, + "step": 30495 + }, + { + "epoch": 4.025339844265541, + "grad_norm": 0.002326509216800332, + "learning_rate": 2.2276900046711334e-07, + "loss": 0.0, + "num_input_tokens_seen": 15013392, + "step": 30500 + }, + { + "epoch": 4.025999736043289, + "grad_norm": 0.002689636778086424, + "learning_rate": 2.2247920462015458e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15016080, + "step": 30505 + }, + { + "epoch": 4.026659627821037, + "grad_norm": 0.0010973322205245495, + "learning_rate": 2.2218957379635483e-07, + "loss": 0.0, + "num_input_tokens_seen": 15018320, + "step": 30510 + }, + { + "epoch": 4.027319519598786, + "grad_norm": 0.005248472560197115, + "learning_rate": 2.2190010805718528e-07, + "loss": 0.0, + "num_input_tokens_seen": 15021008, + "step": 30515 + }, + { + "epoch": 4.027979411376534, + "grad_norm": 0.0011204908369109035, + "learning_rate": 2.2161080746408345e-07, + "loss": 0.0, + "num_input_tokens_seen": 15023312, + "step": 30520 + }, + { + "epoch": 4.028639303154283, + "grad_norm": 0.005970706697553396, + "learning_rate": 2.2132167207845087e-07, + "loss": 0.0023, + "num_input_tokens_seen": 15025552, + "step": 30525 + }, + { + "epoch": 4.029299194932031, + "grad_norm": 0.001030957093462348, + "learning_rate": 2.2103270196165468e-07, + "loss": 0.0, + "num_input_tokens_seen": 15028176, + "step": 30530 + }, + { + "epoch": 4.02995908670978, + "grad_norm": 0.0019524479284882545, + "learning_rate": 2.2074389717502695e-07, + "loss": 0.0, + "num_input_tokens_seen": 15030416, + "step": 30535 + }, + { + "epoch": 4.030618978487528, + "grad_norm": 0.0014918609522283077, + "learning_rate": 2.204552577798635e-07, + "loss": 0.0008, + "num_input_tokens_seen": 15032720, + "step": 30540 + }, + { + "epoch": 4.031278870265276, + "grad_norm": 0.0008159163990058005, + "learning_rate": 2.2016678383742714e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15035216, + "step": 30545 + }, + { + "epoch": 4.031938762043025, + "grad_norm": 0.005126807373017073, + "learning_rate": 2.1987847540894378e-07, + "loss": 0.0, + "num_input_tokens_seen": 15037648, + "step": 30550 + }, + { + "epoch": 4.032598653820774, + "grad_norm": 0.00635765353217721, + "learning_rate": 2.1959033255560455e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15040016, + "step": 30555 + }, + { + "epoch": 4.033258545598522, + "grad_norm": 0.004286654759198427, + "learning_rate": 2.19302355338566e-07, + "loss": 0.0009, + "num_input_tokens_seen": 15042768, + "step": 30560 + }, + { + "epoch": 4.03391843737627, + "grad_norm": 0.00043845814070664346, + "learning_rate": 2.1901454381894914e-07, + "loss": 0.028, + "num_input_tokens_seen": 15045008, + "step": 30565 + }, + { + "epoch": 4.0345783291540185, + "grad_norm": 0.027832932770252228, + "learning_rate": 2.1872689805784007e-07, + "loss": 0.0, + "num_input_tokens_seen": 15047376, + "step": 30570 + }, + { + "epoch": 4.035238220931767, + "grad_norm": 0.0007483892259187996, + "learning_rate": 2.1843941811628918e-07, + "loss": 0.0008, + "num_input_tokens_seen": 15050064, + "step": 30575 + }, + { + "epoch": 4.035898112709516, + "grad_norm": 0.0030488441698253155, + "learning_rate": 2.1815210405531214e-07, + "loss": 0.0, + "num_input_tokens_seen": 15052560, + "step": 30580 + }, + { + "epoch": 4.036558004487264, + "grad_norm": 0.0026415761094540358, + "learning_rate": 2.1786495593588972e-07, + "loss": 0.0, + "num_input_tokens_seen": 15055056, + "step": 30585 + }, + { + "epoch": 4.0372178962650125, + "grad_norm": 0.015966864302754402, + "learning_rate": 2.1757797381896625e-07, + "loss": 0.0, + "num_input_tokens_seen": 15057168, + "step": 30590 + }, + { + "epoch": 4.037877788042761, + "grad_norm": 0.020574091002345085, + "learning_rate": 2.1729115776545192e-07, + "loss": 0.0, + "num_input_tokens_seen": 15059408, + "step": 30595 + }, + { + "epoch": 4.038537679820509, + "grad_norm": 0.004963522776961327, + "learning_rate": 2.170045078362218e-07, + "loss": 0.0, + "num_input_tokens_seen": 15062032, + "step": 30600 + }, + { + "epoch": 4.039197571598258, + "grad_norm": 1.6431316137313843, + "learning_rate": 2.167180240921145e-07, + "loss": 0.001, + "num_input_tokens_seen": 15064528, + "step": 30605 + }, + { + "epoch": 4.0398574633760065, + "grad_norm": 0.004193543456494808, + "learning_rate": 2.1643170659393461e-07, + "loss": 0.0, + "num_input_tokens_seen": 15066704, + "step": 30610 + }, + { + "epoch": 4.040517355153755, + "grad_norm": 0.00035888998536393046, + "learning_rate": 2.1614555540245083e-07, + "loss": 0.0014, + "num_input_tokens_seen": 15069200, + "step": 30615 + }, + { + "epoch": 4.041177246931503, + "grad_norm": 0.009292280301451683, + "learning_rate": 2.1585957057839688e-07, + "loss": 0.0, + "num_input_tokens_seen": 15071440, + "step": 30620 + }, + { + "epoch": 4.041837138709251, + "grad_norm": 0.0003106459917034954, + "learning_rate": 2.1557375218247053e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15074192, + "step": 30625 + }, + { + "epoch": 4.0424970304870005, + "grad_norm": 0.0001242978178197518, + "learning_rate": 2.1528810027533495e-07, + "loss": 0.0, + "num_input_tokens_seen": 15076624, + "step": 30630 + }, + { + "epoch": 4.043156922264749, + "grad_norm": 0.001095140934921801, + "learning_rate": 2.1500261491761796e-07, + "loss": 0.0, + "num_input_tokens_seen": 15079248, + "step": 30635 + }, + { + "epoch": 4.043816814042497, + "grad_norm": 9.301899262936786e-05, + "learning_rate": 2.1471729616991107e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15081488, + "step": 30640 + }, + { + "epoch": 4.044476705820245, + "grad_norm": 0.0002437293151160702, + "learning_rate": 2.1443214409277154e-07, + "loss": 0.0, + "num_input_tokens_seen": 15083856, + "step": 30645 + }, + { + "epoch": 4.045136597597994, + "grad_norm": 0.6968335509300232, + "learning_rate": 2.1414715874672117e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15086160, + "step": 30650 + }, + { + "epoch": 4.045796489375743, + "grad_norm": 0.004181780386716127, + "learning_rate": 2.1386234019224525e-07, + "loss": 0.0, + "num_input_tokens_seen": 15088336, + "step": 30655 + }, + { + "epoch": 4.046456381153491, + "grad_norm": 0.0003220188373234123, + "learning_rate": 2.1357768848979518e-07, + "loss": 0.0, + "num_input_tokens_seen": 15090832, + "step": 30660 + }, + { + "epoch": 4.047116272931239, + "grad_norm": 0.054620057344436646, + "learning_rate": 2.1329320369978532e-07, + "loss": 0.0162, + "num_input_tokens_seen": 15093392, + "step": 30665 + }, + { + "epoch": 4.047776164708988, + "grad_norm": 0.009810811839997768, + "learning_rate": 2.130088858825967e-07, + "loss": 0.0, + "num_input_tokens_seen": 15096144, + "step": 30670 + }, + { + "epoch": 4.048436056486736, + "grad_norm": 0.0014325518859550357, + "learning_rate": 2.1272473509857313e-07, + "loss": 0.0028, + "num_input_tokens_seen": 15098512, + "step": 30675 + }, + { + "epoch": 4.049095948264485, + "grad_norm": 0.013514799997210503, + "learning_rate": 2.1244075140802298e-07, + "loss": 0.0188, + "num_input_tokens_seen": 15101008, + "step": 30680 + }, + { + "epoch": 4.049755840042233, + "grad_norm": 0.0010740907164290547, + "learning_rate": 2.1215693487122078e-07, + "loss": 0.0352, + "num_input_tokens_seen": 15103632, + "step": 30685 + }, + { + "epoch": 4.050415731819982, + "grad_norm": 0.011525586247444153, + "learning_rate": 2.118732855484038e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15106128, + "step": 30690 + }, + { + "epoch": 4.05107562359773, + "grad_norm": 4.342807369539514e-05, + "learning_rate": 2.1158980349977496e-07, + "loss": 0.0564, + "num_input_tokens_seen": 15108496, + "step": 30695 + }, + { + "epoch": 4.051735515375478, + "grad_norm": 0.00133869843557477, + "learning_rate": 2.1130648878550095e-07, + "loss": 0.0, + "num_input_tokens_seen": 15111184, + "step": 30700 + }, + { + "epoch": 4.052395407153226, + "grad_norm": 0.00475813914090395, + "learning_rate": 2.1102334146571342e-07, + "loss": 0.0468, + "num_input_tokens_seen": 15114000, + "step": 30705 + }, + { + "epoch": 4.053055298930976, + "grad_norm": 0.0002620435261633247, + "learning_rate": 2.1074036160050867e-07, + "loss": 0.0, + "num_input_tokens_seen": 15116240, + "step": 30710 + }, + { + "epoch": 4.053715190708724, + "grad_norm": 0.002503307070583105, + "learning_rate": 2.104575492499464e-07, + "loss": 0.0352, + "num_input_tokens_seen": 15118864, + "step": 30715 + }, + { + "epoch": 4.054375082486472, + "grad_norm": 0.001919831382110715, + "learning_rate": 2.1017490447405195e-07, + "loss": 0.0, + "num_input_tokens_seen": 15121552, + "step": 30720 + }, + { + "epoch": 4.05503497426422, + "grad_norm": 0.005105135962367058, + "learning_rate": 2.0989242733281486e-07, + "loss": 0.0, + "num_input_tokens_seen": 15123792, + "step": 30725 + }, + { + "epoch": 4.055694866041969, + "grad_norm": 0.012683448381721973, + "learning_rate": 2.0961011788618833e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15126224, + "step": 30730 + }, + { + "epoch": 4.056354757819718, + "grad_norm": 0.023624008521437645, + "learning_rate": 2.0932797619409058e-07, + "loss": 0.0, + "num_input_tokens_seen": 15128912, + "step": 30735 + }, + { + "epoch": 4.057014649597466, + "grad_norm": 0.002604448702186346, + "learning_rate": 2.0904600231640435e-07, + "loss": 0.0, + "num_input_tokens_seen": 15131472, + "step": 30740 + }, + { + "epoch": 4.057674541375214, + "grad_norm": 0.004134779330343008, + "learning_rate": 2.0876419631297682e-07, + "loss": 0.0, + "num_input_tokens_seen": 15133776, + "step": 30745 + }, + { + "epoch": 4.058334433152963, + "grad_norm": 0.00834393035620451, + "learning_rate": 2.084825582436186e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15136400, + "step": 30750 + }, + { + "epoch": 4.058994324930711, + "grad_norm": 0.00036648480454459786, + "learning_rate": 2.0820108816810565e-07, + "loss": 0.0, + "num_input_tokens_seen": 15138832, + "step": 30755 + }, + { + "epoch": 4.05965421670846, + "grad_norm": 0.00014605256728827953, + "learning_rate": 2.0791978614617834e-07, + "loss": 0.0, + "num_input_tokens_seen": 15141520, + "step": 30760 + }, + { + "epoch": 4.060314108486208, + "grad_norm": 0.00479362765327096, + "learning_rate": 2.0763865223754028e-07, + "loss": 0.0, + "num_input_tokens_seen": 15143760, + "step": 30765 + }, + { + "epoch": 4.060974000263957, + "grad_norm": 0.01058896817266941, + "learning_rate": 2.0735768650186058e-07, + "loss": 0.0, + "num_input_tokens_seen": 15146128, + "step": 30770 + }, + { + "epoch": 4.061633892041705, + "grad_norm": 0.046509820967912674, + "learning_rate": 2.0707688899877195e-07, + "loss": 0.0032, + "num_input_tokens_seen": 15148752, + "step": 30775 + }, + { + "epoch": 4.062293783819453, + "grad_norm": 0.003307884559035301, + "learning_rate": 2.0679625978787196e-07, + "loss": 0.0164, + "num_input_tokens_seen": 15150928, + "step": 30780 + }, + { + "epoch": 4.062953675597202, + "grad_norm": 0.00022394787811208516, + "learning_rate": 2.0651579892872173e-07, + "loss": 0.0, + "num_input_tokens_seen": 15153424, + "step": 30785 + }, + { + "epoch": 4.063613567374951, + "grad_norm": 0.0006549333338625729, + "learning_rate": 2.0623550648084719e-07, + "loss": 0.0, + "num_input_tokens_seen": 15156112, + "step": 30790 + }, + { + "epoch": 4.064273459152699, + "grad_norm": 0.0018182151252403855, + "learning_rate": 2.0595538250373868e-07, + "loss": 0.0, + "num_input_tokens_seen": 15158608, + "step": 30795 + }, + { + "epoch": 4.064933350930447, + "grad_norm": 0.04654252901673317, + "learning_rate": 2.0567542705684992e-07, + "loss": 0.0, + "num_input_tokens_seen": 15161040, + "step": 30800 + }, + { + "epoch": 4.0655932427081956, + "grad_norm": 0.0013130871811881661, + "learning_rate": 2.0539564019959965e-07, + "loss": 0.0, + "num_input_tokens_seen": 15163792, + "step": 30805 + }, + { + "epoch": 4.066253134485945, + "grad_norm": 0.0019590912852436304, + "learning_rate": 2.05116021991371e-07, + "loss": 0.0, + "num_input_tokens_seen": 15166352, + "step": 30810 + }, + { + "epoch": 4.066913026263693, + "grad_norm": 0.00028893371927551925, + "learning_rate": 2.0483657249151043e-07, + "loss": 0.0, + "num_input_tokens_seen": 15168592, + "step": 30815 + }, + { + "epoch": 4.067572918041441, + "grad_norm": 0.3325551152229309, + "learning_rate": 2.045572917593291e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15171344, + "step": 30820 + }, + { + "epoch": 4.0682328098191896, + "grad_norm": 0.007113211788237095, + "learning_rate": 2.0427817985410245e-07, + "loss": 0.0, + "num_input_tokens_seen": 15173776, + "step": 30825 + }, + { + "epoch": 4.068892701596938, + "grad_norm": 0.008288971148431301, + "learning_rate": 2.0399923683507026e-07, + "loss": 0.0007, + "num_input_tokens_seen": 15176208, + "step": 30830 + }, + { + "epoch": 4.069552593374686, + "grad_norm": 0.010359534062445164, + "learning_rate": 2.0372046276143596e-07, + "loss": 0.0, + "num_input_tokens_seen": 15178576, + "step": 30835 + }, + { + "epoch": 4.070212485152435, + "grad_norm": 0.0014906317228451371, + "learning_rate": 2.0344185769236654e-07, + "loss": 0.0, + "num_input_tokens_seen": 15180752, + "step": 30840 + }, + { + "epoch": 4.070872376930184, + "grad_norm": 0.0037701940163969994, + "learning_rate": 2.0316342168699517e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15183248, + "step": 30845 + }, + { + "epoch": 4.071532268707932, + "grad_norm": 0.00434601865708828, + "learning_rate": 2.0288515480441714e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15185936, + "step": 30850 + }, + { + "epoch": 4.07219216048568, + "grad_norm": 0.0018540917662903666, + "learning_rate": 2.0260705710369296e-07, + "loss": 0.061, + "num_input_tokens_seen": 15188176, + "step": 30855 + }, + { + "epoch": 4.072852052263428, + "grad_norm": 0.006436683237552643, + "learning_rate": 2.0232912864384644e-07, + "loss": 0.0, + "num_input_tokens_seen": 15190416, + "step": 30860 + }, + { + "epoch": 4.073511944041178, + "grad_norm": 0.0005059536779299378, + "learning_rate": 2.0205136948386604e-07, + "loss": 0.0003, + "num_input_tokens_seen": 15192848, + "step": 30865 + }, + { + "epoch": 4.074171835818926, + "grad_norm": 0.018882203847169876, + "learning_rate": 2.0177377968270438e-07, + "loss": 0.0, + "num_input_tokens_seen": 15195728, + "step": 30870 + }, + { + "epoch": 4.074831727596674, + "grad_norm": 0.0033944041933864355, + "learning_rate": 2.0149635929927723e-07, + "loss": 0.0, + "num_input_tokens_seen": 15198416, + "step": 30875 + }, + { + "epoch": 4.075491619374422, + "grad_norm": 0.0015481224982067943, + "learning_rate": 2.0121910839246593e-07, + "loss": 0.0, + "num_input_tokens_seen": 15200912, + "step": 30880 + }, + { + "epoch": 4.076151511152171, + "grad_norm": 0.005322051700204611, + "learning_rate": 2.0094202702111462e-07, + "loss": 0.0, + "num_input_tokens_seen": 15203280, + "step": 30885 + }, + { + "epoch": 4.07681140292992, + "grad_norm": 0.003729539690539241, + "learning_rate": 2.006651152440315e-07, + "loss": 0.0, + "num_input_tokens_seen": 15205840, + "step": 30890 + }, + { + "epoch": 4.077471294707668, + "grad_norm": 0.0005679655005224049, + "learning_rate": 2.0038837311998945e-07, + "loss": 0.0, + "num_input_tokens_seen": 15208208, + "step": 30895 + }, + { + "epoch": 4.078131186485416, + "grad_norm": 7.510792784160003e-05, + "learning_rate": 2.0011180070772472e-07, + "loss": 0.0, + "num_input_tokens_seen": 15210576, + "step": 30900 + }, + { + "epoch": 4.078791078263165, + "grad_norm": 0.022109191864728928, + "learning_rate": 1.998353980659383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15213072, + "step": 30905 + }, + { + "epoch": 4.079450970040913, + "grad_norm": 0.0009357878006994724, + "learning_rate": 1.9955916525329396e-07, + "loss": 0.0, + "num_input_tokens_seen": 15215504, + "step": 30910 + }, + { + "epoch": 4.080110861818662, + "grad_norm": 0.00043642695527523756, + "learning_rate": 1.992831023284205e-07, + "loss": 0.0013, + "num_input_tokens_seen": 15217680, + "step": 30915 + }, + { + "epoch": 4.08077075359641, + "grad_norm": 0.0005060906405560672, + "learning_rate": 1.9900720934991055e-07, + "loss": 0.0, + "num_input_tokens_seen": 15220176, + "step": 30920 + }, + { + "epoch": 4.081430645374159, + "grad_norm": 0.002591827418655157, + "learning_rate": 1.9873148637631977e-07, + "loss": 0.0, + "num_input_tokens_seen": 15222608, + "step": 30925 + }, + { + "epoch": 4.082090537151907, + "grad_norm": 2.7808291633846238e-05, + "learning_rate": 1.9845593346616861e-07, + "loss": 0.13, + "num_input_tokens_seen": 15224912, + "step": 30930 + }, + { + "epoch": 4.082750428929655, + "grad_norm": 7.892550638644025e-05, + "learning_rate": 1.981805506779416e-07, + "loss": 0.0, + "num_input_tokens_seen": 15227280, + "step": 30935 + }, + { + "epoch": 4.083410320707404, + "grad_norm": 0.00016154882905539125, + "learning_rate": 1.9790533807008613e-07, + "loss": 0.0, + "num_input_tokens_seen": 15229520, + "step": 30940 + }, + { + "epoch": 4.084070212485153, + "grad_norm": 0.000336196506395936, + "learning_rate": 1.976302957010143e-07, + "loss": 0.0, + "num_input_tokens_seen": 15232016, + "step": 30945 + }, + { + "epoch": 4.084730104262901, + "grad_norm": 0.015776529908180237, + "learning_rate": 1.9735542362910197e-07, + "loss": 0.0188, + "num_input_tokens_seen": 15234320, + "step": 30950 + }, + { + "epoch": 4.085389996040649, + "grad_norm": 11.258391380310059, + "learning_rate": 1.9708072191268886e-07, + "loss": 0.0998, + "num_input_tokens_seen": 15236752, + "step": 30955 + }, + { + "epoch": 4.0860498878183975, + "grad_norm": 3.8240083085838705e-05, + "learning_rate": 1.9680619061007796e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15239248, + "step": 30960 + }, + { + "epoch": 4.086709779596147, + "grad_norm": 0.0016334295505657792, + "learning_rate": 1.9653182977953699e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15241680, + "step": 30965 + }, + { + "epoch": 4.087369671373895, + "grad_norm": 0.00161271751858294, + "learning_rate": 1.9625763947929698e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15244176, + "step": 30970 + }, + { + "epoch": 4.088029563151643, + "grad_norm": 0.0008376072510145605, + "learning_rate": 1.9598361976755252e-07, + "loss": 0.0, + "num_input_tokens_seen": 15246416, + "step": 30975 + }, + { + "epoch": 4.0886894549293915, + "grad_norm": 0.000364994426490739, + "learning_rate": 1.9570977070246254e-07, + "loss": 0.0, + "num_input_tokens_seen": 15248656, + "step": 30980 + }, + { + "epoch": 4.08934934670714, + "grad_norm": 9.574399948120117, + "learning_rate": 1.9543609234214987e-07, + "loss": 0.0066, + "num_input_tokens_seen": 15251216, + "step": 30985 + }, + { + "epoch": 4.090009238484888, + "grad_norm": 0.0003706459829118103, + "learning_rate": 1.9516258474470005e-07, + "loss": 0.0, + "num_input_tokens_seen": 15253840, + "step": 30990 + }, + { + "epoch": 4.090669130262637, + "grad_norm": 0.0002485027362126857, + "learning_rate": 1.948892479681634e-07, + "loss": 0.0, + "num_input_tokens_seen": 15256400, + "step": 30995 + }, + { + "epoch": 4.0913290220403855, + "grad_norm": 0.0002536515239626169, + "learning_rate": 1.946160820705538e-07, + "loss": 0.0, + "num_input_tokens_seen": 15258640, + "step": 31000 + }, + { + "epoch": 4.091988913818134, + "grad_norm": 0.00037458192673511803, + "learning_rate": 1.9434308710984893e-07, + "loss": 0.0176, + "num_input_tokens_seen": 15261264, + "step": 31005 + }, + { + "epoch": 4.092648805595882, + "grad_norm": 0.0009328834130428731, + "learning_rate": 1.9407026314398966e-07, + "loss": 0.0, + "num_input_tokens_seen": 15263696, + "step": 31010 + }, + { + "epoch": 4.09330869737363, + "grad_norm": 0.0010519040515646338, + "learning_rate": 1.9379761023088047e-07, + "loss": 0.0066, + "num_input_tokens_seen": 15266256, + "step": 31015 + }, + { + "epoch": 4.0939685891513795, + "grad_norm": 0.021239787340164185, + "learning_rate": 1.9352512842839096e-07, + "loss": 0.0, + "num_input_tokens_seen": 15268816, + "step": 31020 + }, + { + "epoch": 4.094628480929128, + "grad_norm": 0.0001497942430432886, + "learning_rate": 1.9325281779435265e-07, + "loss": 0.0322, + "num_input_tokens_seen": 15271248, + "step": 31025 + }, + { + "epoch": 4.095288372706876, + "grad_norm": 0.013604072853922844, + "learning_rate": 1.9298067838656196e-07, + "loss": 0.0, + "num_input_tokens_seen": 15273936, + "step": 31030 + }, + { + "epoch": 4.095948264484624, + "grad_norm": 0.00011796157923527062, + "learning_rate": 1.9270871026277812e-07, + "loss": 0.0, + "num_input_tokens_seen": 15276560, + "step": 31035 + }, + { + "epoch": 4.096608156262373, + "grad_norm": 0.0051427981816232204, + "learning_rate": 1.9243691348072454e-07, + "loss": 0.0, + "num_input_tokens_seen": 15279184, + "step": 31040 + }, + { + "epoch": 4.097268048040122, + "grad_norm": 0.011375799775123596, + "learning_rate": 1.9216528809808841e-07, + "loss": 0.0, + "num_input_tokens_seen": 15281424, + "step": 31045 + }, + { + "epoch": 4.09792793981787, + "grad_norm": 0.005654303357005119, + "learning_rate": 1.918938341725198e-07, + "loss": 0.0, + "num_input_tokens_seen": 15283984, + "step": 31050 + }, + { + "epoch": 4.098587831595618, + "grad_norm": 0.00010724661115091294, + "learning_rate": 1.91622551761633e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15286544, + "step": 31055 + }, + { + "epoch": 4.099247723373367, + "grad_norm": 0.0006716445786878467, + "learning_rate": 1.9135144092300604e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15289040, + "step": 31060 + }, + { + "epoch": 4.099907615151115, + "grad_norm": 9.40972167882137e-05, + "learning_rate": 1.9108050171417967e-07, + "loss": 0.0, + "num_input_tokens_seen": 15291728, + "step": 31065 + }, + { + "epoch": 4.100567506928864, + "grad_norm": 0.0023098858073353767, + "learning_rate": 1.9080973419265922e-07, + "loss": 0.0, + "num_input_tokens_seen": 15294160, + "step": 31070 + }, + { + "epoch": 4.101227398706612, + "grad_norm": 0.00010492518777027726, + "learning_rate": 1.9053913841591285e-07, + "loss": 0.0095, + "num_input_tokens_seen": 15296528, + "step": 31075 + }, + { + "epoch": 4.101887290484361, + "grad_norm": 0.00028661335818469524, + "learning_rate": 1.9026871444137306e-07, + "loss": 0.0, + "num_input_tokens_seen": 15298896, + "step": 31080 + }, + { + "epoch": 4.102547182262109, + "grad_norm": 0.009167580865323544, + "learning_rate": 1.8999846232643468e-07, + "loss": 0.0, + "num_input_tokens_seen": 15301584, + "step": 31085 + }, + { + "epoch": 4.103207074039857, + "grad_norm": 0.04716065526008606, + "learning_rate": 1.897283821284571e-07, + "loss": 0.0, + "num_input_tokens_seen": 15304208, + "step": 31090 + }, + { + "epoch": 4.103866965817606, + "grad_norm": 8.909520149230957, + "learning_rate": 1.894584739047631e-07, + "loss": 0.0226, + "num_input_tokens_seen": 15306768, + "step": 31095 + }, + { + "epoch": 4.104526857595355, + "grad_norm": 0.0014904022682458162, + "learning_rate": 1.8918873771263842e-07, + "loss": 0.0, + "num_input_tokens_seen": 15309200, + "step": 31100 + }, + { + "epoch": 4.105186749373103, + "grad_norm": 0.0003525837091729045, + "learning_rate": 1.8891917360933262e-07, + "loss": 0.0, + "num_input_tokens_seen": 15311632, + "step": 31105 + }, + { + "epoch": 4.105846641150851, + "grad_norm": 0.00023026631970424205, + "learning_rate": 1.8864978165205892e-07, + "loss": 0.0, + "num_input_tokens_seen": 15313936, + "step": 31110 + }, + { + "epoch": 4.1065065329285995, + "grad_norm": 0.00022328045452013612, + "learning_rate": 1.8838056189799388e-07, + "loss": 0.0, + "num_input_tokens_seen": 15316368, + "step": 31115 + }, + { + "epoch": 4.107166424706348, + "grad_norm": 7.168061711126938e-05, + "learning_rate": 1.881115144042771e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15318736, + "step": 31120 + }, + { + "epoch": 4.107826316484097, + "grad_norm": 0.09140996634960175, + "learning_rate": 1.8784263922801212e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15321360, + "step": 31125 + }, + { + "epoch": 4.108486208261845, + "grad_norm": 0.0032238177955150604, + "learning_rate": 1.8757393642626606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15323664, + "step": 31130 + }, + { + "epoch": 4.1091461000395935, + "grad_norm": 0.001386750489473343, + "learning_rate": 1.873054060560686e-07, + "loss": 0.0, + "num_input_tokens_seen": 15325904, + "step": 31135 + }, + { + "epoch": 4.109805991817342, + "grad_norm": 0.05405682697892189, + "learning_rate": 1.870370481744137e-07, + "loss": 0.0, + "num_input_tokens_seen": 15328208, + "step": 31140 + }, + { + "epoch": 4.11046588359509, + "grad_norm": 0.000502249866258353, + "learning_rate": 1.8676886283825843e-07, + "loss": 0.0, + "num_input_tokens_seen": 15330704, + "step": 31145 + }, + { + "epoch": 4.111125775372839, + "grad_norm": 0.01588205061852932, + "learning_rate": 1.8650085010452288e-07, + "loss": 0.0, + "num_input_tokens_seen": 15333072, + "step": 31150 + }, + { + "epoch": 4.1117856671505875, + "grad_norm": 0.0001183047570521012, + "learning_rate": 1.8623301003009106e-07, + "loss": 0.0011, + "num_input_tokens_seen": 15335440, + "step": 31155 + }, + { + "epoch": 4.112445558928336, + "grad_norm": 0.00015695270849391818, + "learning_rate": 1.8596534267180998e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15338320, + "step": 31160 + }, + { + "epoch": 4.113105450706084, + "grad_norm": 0.0009862695587798953, + "learning_rate": 1.8569784808649035e-07, + "loss": 0.0, + "num_input_tokens_seen": 15341072, + "step": 31165 + }, + { + "epoch": 4.113765342483832, + "grad_norm": 0.0006455021211877465, + "learning_rate": 1.8543052633090582e-07, + "loss": 0.0294, + "num_input_tokens_seen": 15343504, + "step": 31170 + }, + { + "epoch": 4.1144252342615815, + "grad_norm": 0.006330928765237331, + "learning_rate": 1.8516337746179288e-07, + "loss": 0.0266, + "num_input_tokens_seen": 15346128, + "step": 31175 + }, + { + "epoch": 4.11508512603933, + "grad_norm": 7.823634223314002e-05, + "learning_rate": 1.8489640153585296e-07, + "loss": 0.0, + "num_input_tokens_seen": 15348752, + "step": 31180 + }, + { + "epoch": 4.115745017817078, + "grad_norm": 0.0016246692975983024, + "learning_rate": 1.8462959860974914e-07, + "loss": 0.0, + "num_input_tokens_seen": 15350992, + "step": 31185 + }, + { + "epoch": 4.116404909594826, + "grad_norm": 0.0002753250009845942, + "learning_rate": 1.843629687401085e-07, + "loss": 0.0, + "num_input_tokens_seen": 15353360, + "step": 31190 + }, + { + "epoch": 4.117064801372575, + "grad_norm": 0.0006193838198669255, + "learning_rate": 1.840965119835216e-07, + "loss": 0.0, + "num_input_tokens_seen": 15355856, + "step": 31195 + }, + { + "epoch": 4.117724693150324, + "grad_norm": 3.5579931136453524e-05, + "learning_rate": 1.838302283965415e-07, + "loss": 0.0, + "num_input_tokens_seen": 15358288, + "step": 31200 + }, + { + "epoch": 4.118384584928072, + "grad_norm": 0.0013902924256399274, + "learning_rate": 1.835641180356855e-07, + "loss": 0.0, + "num_input_tokens_seen": 15360592, + "step": 31205 + }, + { + "epoch": 4.11904447670582, + "grad_norm": 0.0001232646027347073, + "learning_rate": 1.8329818095743265e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15362896, + "step": 31210 + }, + { + "epoch": 4.119704368483569, + "grad_norm": 0.00045455177314579487, + "learning_rate": 1.8303241721822737e-07, + "loss": 0.0, + "num_input_tokens_seen": 15365328, + "step": 31215 + }, + { + "epoch": 4.120364260261317, + "grad_norm": 17.56751823425293, + "learning_rate": 1.8276682687447553e-07, + "loss": 0.0426, + "num_input_tokens_seen": 15367632, + "step": 31220 + }, + { + "epoch": 4.121024152039066, + "grad_norm": 0.0020516146905720234, + "learning_rate": 1.825014099825466e-07, + "loss": 0.0, + "num_input_tokens_seen": 15370128, + "step": 31225 + }, + { + "epoch": 4.121684043816814, + "grad_norm": 0.0730593279004097, + "learning_rate": 1.822361665987734e-07, + "loss": 0.0, + "num_input_tokens_seen": 15372688, + "step": 31230 + }, + { + "epoch": 4.122343935594563, + "grad_norm": 0.010133378207683563, + "learning_rate": 1.819710967794521e-07, + "loss": 0.0, + "num_input_tokens_seen": 15375056, + "step": 31235 + }, + { + "epoch": 4.123003827372311, + "grad_norm": 0.001488432171754539, + "learning_rate": 1.8170620058084208e-07, + "loss": 0.0, + "num_input_tokens_seen": 15377552, + "step": 31240 + }, + { + "epoch": 4.123663719150059, + "grad_norm": 5.022612094762735e-05, + "learning_rate": 1.814414780591651e-07, + "loss": 0.0, + "num_input_tokens_seen": 15379920, + "step": 31245 + }, + { + "epoch": 4.124323610927807, + "grad_norm": 0.0005323368241079152, + "learning_rate": 1.811769292706068e-07, + "loss": 0.0, + "num_input_tokens_seen": 15382224, + "step": 31250 + }, + { + "epoch": 4.124983502705557, + "grad_norm": 0.0006102940533310175, + "learning_rate": 1.8091255427131614e-07, + "loss": 0.0, + "num_input_tokens_seen": 15384912, + "step": 31255 + }, + { + "epoch": 4.125643394483305, + "grad_norm": 0.00011270979302935302, + "learning_rate": 1.8064835311740422e-07, + "loss": 0.0, + "num_input_tokens_seen": 15387216, + "step": 31260 + }, + { + "epoch": 4.126303286261053, + "grad_norm": 0.0010503892553970218, + "learning_rate": 1.80384325864946e-07, + "loss": 0.0035, + "num_input_tokens_seen": 15389648, + "step": 31265 + }, + { + "epoch": 4.126963178038801, + "grad_norm": 0.013230645097792149, + "learning_rate": 1.8012047256997977e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15392272, + "step": 31270 + }, + { + "epoch": 4.12762306981655, + "grad_norm": 0.00023171912471298128, + "learning_rate": 1.798567932885059e-07, + "loss": 0.0, + "num_input_tokens_seen": 15394896, + "step": 31275 + }, + { + "epoch": 4.128282961594299, + "grad_norm": 0.1003655418753624, + "learning_rate": 1.7959328807648856e-07, + "loss": 0.0343, + "num_input_tokens_seen": 15397584, + "step": 31280 + }, + { + "epoch": 4.128942853372047, + "grad_norm": 0.5334244966506958, + "learning_rate": 1.7932995698985486e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15400144, + "step": 31285 + }, + { + "epoch": 4.129602745149795, + "grad_norm": 0.00014007413119543344, + "learning_rate": 1.7906680008449536e-07, + "loss": 0.0, + "num_input_tokens_seen": 15402832, + "step": 31290 + }, + { + "epoch": 4.130262636927544, + "grad_norm": 0.004108428489416838, + "learning_rate": 1.788038174162625e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15405328, + "step": 31295 + }, + { + "epoch": 4.130922528705292, + "grad_norm": 0.000142919976497069, + "learning_rate": 1.785410090409727e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15407952, + "step": 31300 + }, + { + "epoch": 4.131582420483041, + "grad_norm": 0.0007745701004751027, + "learning_rate": 1.7827837501440556e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15410320, + "step": 31305 + }, + { + "epoch": 4.132242312260789, + "grad_norm": 0.004103151150047779, + "learning_rate": 1.7801591539230255e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15412688, + "step": 31310 + }, + { + "epoch": 4.132902204038538, + "grad_norm": 13.383068084716797, + "learning_rate": 1.7775363023036916e-07, + "loss": 0.0338, + "num_input_tokens_seen": 15415056, + "step": 31315 + }, + { + "epoch": 4.133562095816286, + "grad_norm": 0.0009205325040966272, + "learning_rate": 1.7749151958427379e-07, + "loss": 0.0, + "num_input_tokens_seen": 15417488, + "step": 31320 + }, + { + "epoch": 4.134221987594034, + "grad_norm": 0.00014656288840342313, + "learning_rate": 1.77229583509647e-07, + "loss": 0.0, + "num_input_tokens_seen": 15419792, + "step": 31325 + }, + { + "epoch": 4.134881879371783, + "grad_norm": 7.536137127317488e-05, + "learning_rate": 1.7696782206208306e-07, + "loss": 0.0, + "num_input_tokens_seen": 15422480, + "step": 31330 + }, + { + "epoch": 4.135541771149532, + "grad_norm": 0.15476743876934052, + "learning_rate": 1.767062352971389e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15424784, + "step": 31335 + }, + { + "epoch": 4.13620166292728, + "grad_norm": 0.0006393285002559423, + "learning_rate": 1.7644482327033484e-07, + "loss": 0.0, + "num_input_tokens_seen": 15427344, + "step": 31340 + }, + { + "epoch": 4.136861554705028, + "grad_norm": 0.03930385038256645, + "learning_rate": 1.761835860371532e-07, + "loss": 0.0, + "num_input_tokens_seen": 15430096, + "step": 31345 + }, + { + "epoch": 4.1375214464827765, + "grad_norm": 0.03138414025306702, + "learning_rate": 1.759225236530394e-07, + "loss": 0.0, + "num_input_tokens_seen": 15432784, + "step": 31350 + }, + { + "epoch": 4.138181338260526, + "grad_norm": 9.463958849664778e-05, + "learning_rate": 1.756616361734029e-07, + "loss": 0.0, + "num_input_tokens_seen": 15434832, + "step": 31355 + }, + { + "epoch": 4.138841230038274, + "grad_norm": 2.535146474838257, + "learning_rate": 1.754009236536146e-07, + "loss": 0.0205, + "num_input_tokens_seen": 15437264, + "step": 31360 + }, + { + "epoch": 4.139501121816022, + "grad_norm": 0.0004646007146220654, + "learning_rate": 1.7514038614900905e-07, + "loss": 0.0, + "num_input_tokens_seen": 15439952, + "step": 31365 + }, + { + "epoch": 4.1401610135937705, + "grad_norm": 0.00032446475233882666, + "learning_rate": 1.748800237148833e-07, + "loss": 0.0, + "num_input_tokens_seen": 15442192, + "step": 31370 + }, + { + "epoch": 4.140820905371519, + "grad_norm": 0.00011745891242753714, + "learning_rate": 1.7461983640649736e-07, + "loss": 0.0, + "num_input_tokens_seen": 15444560, + "step": 31375 + }, + { + "epoch": 4.141480797149267, + "grad_norm": 0.06134350597858429, + "learning_rate": 1.7435982427907446e-07, + "loss": 0.0, + "num_input_tokens_seen": 15447056, + "step": 31380 + }, + { + "epoch": 4.142140688927016, + "grad_norm": 0.005177411716431379, + "learning_rate": 1.7409998738779962e-07, + "loss": 0.0, + "num_input_tokens_seen": 15449680, + "step": 31385 + }, + { + "epoch": 4.1428005807047645, + "grad_norm": 0.00234969868324697, + "learning_rate": 1.7384032578782216e-07, + "loss": 0.0, + "num_input_tokens_seen": 15452048, + "step": 31390 + }, + { + "epoch": 4.143460472482513, + "grad_norm": 0.0019324537133798003, + "learning_rate": 1.7358083953425306e-07, + "loss": 0.0, + "num_input_tokens_seen": 15454736, + "step": 31395 + }, + { + "epoch": 4.144120364260261, + "grad_norm": 0.00011362414807081223, + "learning_rate": 1.7332152868216598e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15457232, + "step": 31400 + }, + { + "epoch": 4.144780256038009, + "grad_norm": 0.00027947762282565236, + "learning_rate": 1.7306239328659822e-07, + "loss": 0.0, + "num_input_tokens_seen": 15459728, + "step": 31405 + }, + { + "epoch": 4.1454401478157585, + "grad_norm": 0.0011989494087174535, + "learning_rate": 1.728034334025491e-07, + "loss": 0.0, + "num_input_tokens_seen": 15462096, + "step": 31410 + }, + { + "epoch": 4.146100039593507, + "grad_norm": 0.0004557797801680863, + "learning_rate": 1.7254464908498156e-07, + "loss": 0.0511, + "num_input_tokens_seen": 15464720, + "step": 31415 + }, + { + "epoch": 4.146759931371255, + "grad_norm": 0.00030610704561695457, + "learning_rate": 1.7228604038882e-07, + "loss": 0.0003, + "num_input_tokens_seen": 15467024, + "step": 31420 + }, + { + "epoch": 4.147419823149003, + "grad_norm": 2.5527537218295038e-05, + "learning_rate": 1.720276073689525e-07, + "loss": 0.0, + "num_input_tokens_seen": 15469520, + "step": 31425 + }, + { + "epoch": 4.148079714926752, + "grad_norm": 0.005444636568427086, + "learning_rate": 1.7176935008022986e-07, + "loss": 0.0411, + "num_input_tokens_seen": 15471824, + "step": 31430 + }, + { + "epoch": 4.148739606704501, + "grad_norm": 0.0002152034139726311, + "learning_rate": 1.715112685774649e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15474000, + "step": 31435 + }, + { + "epoch": 4.149399498482249, + "grad_norm": 0.000859928026329726, + "learning_rate": 1.7125336291543368e-07, + "loss": 0.0, + "num_input_tokens_seen": 15476560, + "step": 31440 + }, + { + "epoch": 4.150059390259997, + "grad_norm": 0.0003301157266832888, + "learning_rate": 1.7099563314887498e-07, + "loss": 0.0426, + "num_input_tokens_seen": 15478736, + "step": 31445 + }, + { + "epoch": 4.150719282037746, + "grad_norm": 0.009691378101706505, + "learning_rate": 1.7073807933249008e-07, + "loss": 0.0, + "num_input_tokens_seen": 15480976, + "step": 31450 + }, + { + "epoch": 4.151379173815494, + "grad_norm": 0.0030957702547311783, + "learning_rate": 1.7048070152094263e-07, + "loss": 0.0595, + "num_input_tokens_seen": 15483536, + "step": 31455 + }, + { + "epoch": 4.152039065593243, + "grad_norm": 0.0003932028484996408, + "learning_rate": 1.7022349976885941e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15486032, + "step": 31460 + }, + { + "epoch": 4.152698957370991, + "grad_norm": 12.39704418182373, + "learning_rate": 1.6996647413082977e-07, + "loss": 0.0519, + "num_input_tokens_seen": 15488912, + "step": 31465 + }, + { + "epoch": 4.15335884914874, + "grad_norm": 0.015966031700372696, + "learning_rate": 1.6970962466140514e-07, + "loss": 0.0, + "num_input_tokens_seen": 15491408, + "step": 31470 + }, + { + "epoch": 4.154018740926488, + "grad_norm": 0.00011750247358577326, + "learning_rate": 1.6945295141510018e-07, + "loss": 0.0, + "num_input_tokens_seen": 15493776, + "step": 31475 + }, + { + "epoch": 4.154678632704236, + "grad_norm": 8.13830629340373e-05, + "learning_rate": 1.691964544463922e-07, + "loss": 0.0, + "num_input_tokens_seen": 15496272, + "step": 31480 + }, + { + "epoch": 4.155338524481985, + "grad_norm": 0.00031953773577697575, + "learning_rate": 1.6894013380972028e-07, + "loss": 0.0, + "num_input_tokens_seen": 15498512, + "step": 31485 + }, + { + "epoch": 4.155998416259734, + "grad_norm": 0.008766383863985538, + "learning_rate": 1.6868398955948693e-07, + "loss": 0.0, + "num_input_tokens_seen": 15501008, + "step": 31490 + }, + { + "epoch": 4.156658308037482, + "grad_norm": 0.061419978737831116, + "learning_rate": 1.684280217500569e-07, + "loss": 0.0, + "num_input_tokens_seen": 15503312, + "step": 31495 + }, + { + "epoch": 4.15731819981523, + "grad_norm": 0.6979484558105469, + "learning_rate": 1.6817223043575768e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15506000, + "step": 31500 + }, + { + "epoch": 4.1579780915929785, + "grad_norm": 0.00016205478459596634, + "learning_rate": 1.6791661567087888e-07, + "loss": 0.0253, + "num_input_tokens_seen": 15508752, + "step": 31505 + }, + { + "epoch": 4.158637983370728, + "grad_norm": 0.07795727998018265, + "learning_rate": 1.6766117750967244e-07, + "loss": 0.0, + "num_input_tokens_seen": 15511440, + "step": 31510 + }, + { + "epoch": 4.159297875148476, + "grad_norm": 2.1538477085414343e-05, + "learning_rate": 1.6740591600635433e-07, + "loss": 0.0, + "num_input_tokens_seen": 15513808, + "step": 31515 + }, + { + "epoch": 4.159957766926224, + "grad_norm": 0.00036920199636369944, + "learning_rate": 1.671508312151011e-07, + "loss": 0.0, + "num_input_tokens_seen": 15516496, + "step": 31520 + }, + { + "epoch": 4.1606176587039725, + "grad_norm": 6.141668563941494e-05, + "learning_rate": 1.6689592319005296e-07, + "loss": 0.0645, + "num_input_tokens_seen": 15519056, + "step": 31525 + }, + { + "epoch": 4.161277550481721, + "grad_norm": 0.0010340113658457994, + "learning_rate": 1.6664119198531245e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15521104, + "step": 31530 + }, + { + "epoch": 4.161937442259469, + "grad_norm": 0.002041921252384782, + "learning_rate": 1.6638663765494398e-07, + "loss": 0.0294, + "num_input_tokens_seen": 15523344, + "step": 31535 + }, + { + "epoch": 4.162597334037218, + "grad_norm": 0.00012479268480092287, + "learning_rate": 1.6613226025297545e-07, + "loss": 0.0, + "num_input_tokens_seen": 15525840, + "step": 31540 + }, + { + "epoch": 4.1632572258149665, + "grad_norm": 0.0012602178612723947, + "learning_rate": 1.6587805983339564e-07, + "loss": 0.0, + "num_input_tokens_seen": 15528144, + "step": 31545 + }, + { + "epoch": 4.163917117592715, + "grad_norm": 0.0001506891567260027, + "learning_rate": 1.65624036450158e-07, + "loss": 0.0, + "num_input_tokens_seen": 15530512, + "step": 31550 + }, + { + "epoch": 4.164577009370463, + "grad_norm": 0.00014679189189337194, + "learning_rate": 1.6537019015717647e-07, + "loss": 0.0, + "num_input_tokens_seen": 15532880, + "step": 31555 + }, + { + "epoch": 4.165236901148211, + "grad_norm": 0.0002842925605364144, + "learning_rate": 1.6511652100832797e-07, + "loss": 0.0, + "num_input_tokens_seen": 15535440, + "step": 31560 + }, + { + "epoch": 4.1658967929259605, + "grad_norm": 0.00048440933460369706, + "learning_rate": 1.648630290574522e-07, + "loss": 0.0, + "num_input_tokens_seen": 15538000, + "step": 31565 + }, + { + "epoch": 4.166556684703709, + "grad_norm": 0.00023747571685817093, + "learning_rate": 1.646097143583508e-07, + "loss": 0.02, + "num_input_tokens_seen": 15540688, + "step": 31570 + }, + { + "epoch": 4.167216576481457, + "grad_norm": 5.907857484999113e-05, + "learning_rate": 1.6435657696478844e-07, + "loss": 0.0252, + "num_input_tokens_seen": 15543120, + "step": 31575 + }, + { + "epoch": 4.167876468259205, + "grad_norm": 0.002017110353335738, + "learning_rate": 1.6410361693049114e-07, + "loss": 0.0112, + "num_input_tokens_seen": 15545232, + "step": 31580 + }, + { + "epoch": 4.168536360036954, + "grad_norm": 0.006705759093165398, + "learning_rate": 1.6385083430914792e-07, + "loss": 0.0, + "num_input_tokens_seen": 15547920, + "step": 31585 + }, + { + "epoch": 4.169196251814703, + "grad_norm": 0.00014904749696142972, + "learning_rate": 1.6359822915441058e-07, + "loss": 0.0456, + "num_input_tokens_seen": 15550224, + "step": 31590 + }, + { + "epoch": 4.169856143592451, + "grad_norm": 0.09349919855594635, + "learning_rate": 1.6334580151989207e-07, + "loss": 0.0, + "num_input_tokens_seen": 15552656, + "step": 31595 + }, + { + "epoch": 4.170516035370199, + "grad_norm": 0.004773380700498819, + "learning_rate": 1.630935514591686e-07, + "loss": 0.0, + "num_input_tokens_seen": 15555280, + "step": 31600 + }, + { + "epoch": 4.171175927147948, + "grad_norm": 0.054015111178159714, + "learning_rate": 1.6284147902577872e-07, + "loss": 0.0, + "num_input_tokens_seen": 15557776, + "step": 31605 + }, + { + "epoch": 4.171835818925696, + "grad_norm": 0.00047588403685949743, + "learning_rate": 1.6258958427322234e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15560208, + "step": 31610 + }, + { + "epoch": 4.172495710703445, + "grad_norm": 0.266117125749588, + "learning_rate": 1.623378672549628e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15562768, + "step": 31615 + }, + { + "epoch": 4.173155602481193, + "grad_norm": 0.00938863679766655, + "learning_rate": 1.620863280244249e-07, + "loss": 0.0, + "num_input_tokens_seen": 15565328, + "step": 31620 + }, + { + "epoch": 4.173815494258942, + "grad_norm": 0.00039532931987196207, + "learning_rate": 1.6183496663499652e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15567632, + "step": 31625 + }, + { + "epoch": 4.17447538603669, + "grad_norm": 0.00045435517677105963, + "learning_rate": 1.6158378314002673e-07, + "loss": 0.0, + "num_input_tokens_seen": 15570064, + "step": 31630 + }, + { + "epoch": 4.175135277814438, + "grad_norm": 0.0060187773779034615, + "learning_rate": 1.613327775928276e-07, + "loss": 0.0, + "num_input_tokens_seen": 15572624, + "step": 31635 + }, + { + "epoch": 4.175795169592186, + "grad_norm": 0.001109470147639513, + "learning_rate": 1.6108195004667357e-07, + "loss": 0.0, + "num_input_tokens_seen": 15574672, + "step": 31640 + }, + { + "epoch": 4.176455061369936, + "grad_norm": 0.26625481247901917, + "learning_rate": 1.6083130055480033e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15577488, + "step": 31645 + }, + { + "epoch": 4.177114953147684, + "grad_norm": 6.225990364328027e-05, + "learning_rate": 1.6058082917040682e-07, + "loss": 0.0, + "num_input_tokens_seen": 15579920, + "step": 31650 + }, + { + "epoch": 4.177774844925432, + "grad_norm": 0.0013030472910031676, + "learning_rate": 1.6033053594665402e-07, + "loss": 0.0, + "num_input_tokens_seen": 15582224, + "step": 31655 + }, + { + "epoch": 4.17843473670318, + "grad_norm": 0.00037867153878323734, + "learning_rate": 1.6008042093666428e-07, + "loss": 0.0, + "num_input_tokens_seen": 15584656, + "step": 31660 + }, + { + "epoch": 4.179094628480929, + "grad_norm": 0.0003713365877047181, + "learning_rate": 1.5983048419352297e-07, + "loss": 0.0, + "num_input_tokens_seen": 15587024, + "step": 31665 + }, + { + "epoch": 4.179754520258678, + "grad_norm": 0.17333261668682098, + "learning_rate": 1.5958072577027738e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15589648, + "step": 31670 + }, + { + "epoch": 4.180414412036426, + "grad_norm": 0.002129318891093135, + "learning_rate": 1.5933114571993712e-07, + "loss": 0.0, + "num_input_tokens_seen": 15592464, + "step": 31675 + }, + { + "epoch": 4.181074303814174, + "grad_norm": 0.00019260341650806367, + "learning_rate": 1.5908174409547347e-07, + "loss": 0.0381, + "num_input_tokens_seen": 15595024, + "step": 31680 + }, + { + "epoch": 4.181734195591923, + "grad_norm": 0.7180609107017517, + "learning_rate": 1.588325209498198e-07, + "loss": 0.0677, + "num_input_tokens_seen": 15597648, + "step": 31685 + }, + { + "epoch": 4.182394087369671, + "grad_norm": 6.745587597833946e-05, + "learning_rate": 1.5858347633587277e-07, + "loss": 0.0, + "num_input_tokens_seen": 15600208, + "step": 31690 + }, + { + "epoch": 4.18305397914742, + "grad_norm": 0.002034937497228384, + "learning_rate": 1.5833461030648954e-07, + "loss": 0.0, + "num_input_tokens_seen": 15602768, + "step": 31695 + }, + { + "epoch": 4.183713870925168, + "grad_norm": 0.08714821189641953, + "learning_rate": 1.5808592291449074e-07, + "loss": 0.0207, + "num_input_tokens_seen": 15605456, + "step": 31700 + }, + { + "epoch": 4.184373762702917, + "grad_norm": 0.0014131611678749323, + "learning_rate": 1.5783741421265784e-07, + "loss": 0.0003, + "num_input_tokens_seen": 15608016, + "step": 31705 + }, + { + "epoch": 4.185033654480665, + "grad_norm": 0.0023685896303504705, + "learning_rate": 1.575890842537353e-07, + "loss": 0.0, + "num_input_tokens_seen": 15610256, + "step": 31710 + }, + { + "epoch": 4.185693546258413, + "grad_norm": 0.0001617130619706586, + "learning_rate": 1.573409330904296e-07, + "loss": 0.0, + "num_input_tokens_seen": 15612688, + "step": 31715 + }, + { + "epoch": 4.1863534380361624, + "grad_norm": 0.059925127774477005, + "learning_rate": 1.5709296077540835e-07, + "loss": 0.0579, + "num_input_tokens_seen": 15615376, + "step": 31720 + }, + { + "epoch": 4.187013329813911, + "grad_norm": 0.01801411621272564, + "learning_rate": 1.5684516736130283e-07, + "loss": 0.0441, + "num_input_tokens_seen": 15617680, + "step": 31725 + }, + { + "epoch": 4.187673221591659, + "grad_norm": 0.0002701185003388673, + "learning_rate": 1.5659755290070453e-07, + "loss": 0.0, + "num_input_tokens_seen": 15620432, + "step": 31730 + }, + { + "epoch": 4.188333113369407, + "grad_norm": 0.0005498105310834944, + "learning_rate": 1.5635011744616854e-07, + "loss": 0.0, + "num_input_tokens_seen": 15622736, + "step": 31735 + }, + { + "epoch": 4.188993005147156, + "grad_norm": 0.0004175195062998682, + "learning_rate": 1.5610286105021063e-07, + "loss": 0.0, + "num_input_tokens_seen": 15625424, + "step": 31740 + }, + { + "epoch": 4.189652896924905, + "grad_norm": 0.0007348982035182416, + "learning_rate": 1.5585578376530938e-07, + "loss": 0.0003, + "num_input_tokens_seen": 15627920, + "step": 31745 + }, + { + "epoch": 4.190312788702653, + "grad_norm": 2.787469929899089e-05, + "learning_rate": 1.556088856439055e-07, + "loss": 0.0, + "num_input_tokens_seen": 15630352, + "step": 31750 + }, + { + "epoch": 4.190972680480401, + "grad_norm": 2.5024770366144367e-05, + "learning_rate": 1.5536216673840084e-07, + "loss": 0.0518, + "num_input_tokens_seen": 15632848, + "step": 31755 + }, + { + "epoch": 4.19163257225815, + "grad_norm": 0.018026202917099, + "learning_rate": 1.551156271011599e-07, + "loss": 0.0, + "num_input_tokens_seen": 15635344, + "step": 31760 + }, + { + "epoch": 4.192292464035898, + "grad_norm": 0.0006465193582698703, + "learning_rate": 1.5486926678450907e-07, + "loss": 0.0, + "num_input_tokens_seen": 15637840, + "step": 31765 + }, + { + "epoch": 4.192952355813647, + "grad_norm": 0.002082411665469408, + "learning_rate": 1.5462308584073625e-07, + "loss": 0.0, + "num_input_tokens_seen": 15640272, + "step": 31770 + }, + { + "epoch": 4.193612247591395, + "grad_norm": 0.0004274914681445807, + "learning_rate": 1.5437708432209174e-07, + "loss": 0.0, + "num_input_tokens_seen": 15642832, + "step": 31775 + }, + { + "epoch": 4.194272139369144, + "grad_norm": 0.0006353407516144216, + "learning_rate": 1.5413126228078755e-07, + "loss": 0.0, + "num_input_tokens_seen": 15645136, + "step": 31780 + }, + { + "epoch": 4.194932031146892, + "grad_norm": 7.021045166766271e-05, + "learning_rate": 1.5388561976899784e-07, + "loss": 0.0, + "num_input_tokens_seen": 15647376, + "step": 31785 + }, + { + "epoch": 4.19559192292464, + "grad_norm": 0.0003200802602805197, + "learning_rate": 1.53640156838858e-07, + "loss": 0.0, + "num_input_tokens_seen": 15649616, + "step": 31790 + }, + { + "epoch": 4.196251814702388, + "grad_norm": 0.000181476934812963, + "learning_rate": 1.5339487354246605e-07, + "loss": 0.0, + "num_input_tokens_seen": 15652048, + "step": 31795 + }, + { + "epoch": 4.196911706480138, + "grad_norm": 0.9393686652183533, + "learning_rate": 1.5314976993188177e-07, + "loss": 0.001, + "num_input_tokens_seen": 15654288, + "step": 31800 + }, + { + "epoch": 4.197571598257886, + "grad_norm": 0.0006523782503791153, + "learning_rate": 1.5290484605912624e-07, + "loss": 0.0, + "num_input_tokens_seen": 15656784, + "step": 31805 + }, + { + "epoch": 4.198231490035634, + "grad_norm": 0.0040655615739524364, + "learning_rate": 1.5266010197618296e-07, + "loss": 0.0, + "num_input_tokens_seen": 15659536, + "step": 31810 + }, + { + "epoch": 4.198891381813382, + "grad_norm": 2.516552209854126, + "learning_rate": 1.5241553773499727e-07, + "loss": 0.001, + "num_input_tokens_seen": 15661776, + "step": 31815 + }, + { + "epoch": 4.199551273591131, + "grad_norm": 0.0005888799205422401, + "learning_rate": 1.5217115338747577e-07, + "loss": 0.0, + "num_input_tokens_seen": 15664208, + "step": 31820 + }, + { + "epoch": 4.20021116536888, + "grad_norm": 0.0015442796284332871, + "learning_rate": 1.5192694898548742e-07, + "loss": 0.0132, + "num_input_tokens_seen": 15666576, + "step": 31825 + }, + { + "epoch": 4.200871057146628, + "grad_norm": 0.0013671378837898374, + "learning_rate": 1.5168292458086286e-07, + "loss": 0.0, + "num_input_tokens_seen": 15668880, + "step": 31830 + }, + { + "epoch": 4.201530948924376, + "grad_norm": 0.0011779994238168001, + "learning_rate": 1.5143908022539487e-07, + "loss": 0.028, + "num_input_tokens_seen": 15671120, + "step": 31835 + }, + { + "epoch": 4.202190840702125, + "grad_norm": 0.00035011785803362727, + "learning_rate": 1.5119541597083718e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15673424, + "step": 31840 + }, + { + "epoch": 4.202850732479873, + "grad_norm": 0.0017878002254292369, + "learning_rate": 1.5095193186890554e-07, + "loss": 0.0, + "num_input_tokens_seen": 15676112, + "step": 31845 + }, + { + "epoch": 4.203510624257622, + "grad_norm": 0.42923977971076965, + "learning_rate": 1.5070862797127847e-07, + "loss": 0.0006, + "num_input_tokens_seen": 15678608, + "step": 31850 + }, + { + "epoch": 4.20417051603537, + "grad_norm": 0.000863776367623359, + "learning_rate": 1.504655043295948e-07, + "loss": 0.0074, + "num_input_tokens_seen": 15680976, + "step": 31855 + }, + { + "epoch": 4.204830407813119, + "grad_norm": 0.000916738819796592, + "learning_rate": 1.5022256099545594e-07, + "loss": 0.0, + "num_input_tokens_seen": 15683280, + "step": 31860 + }, + { + "epoch": 4.205490299590867, + "grad_norm": 0.00029176438692957163, + "learning_rate": 1.4997979802042515e-07, + "loss": 0.0, + "num_input_tokens_seen": 15685648, + "step": 31865 + }, + { + "epoch": 4.206150191368615, + "grad_norm": 4.51797604910098e-05, + "learning_rate": 1.4973721545602668e-07, + "loss": 0.0, + "num_input_tokens_seen": 15688272, + "step": 31870 + }, + { + "epoch": 4.206810083146364, + "grad_norm": 0.008635712787508965, + "learning_rate": 1.4949481335374736e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15690768, + "step": 31875 + }, + { + "epoch": 4.207469974924113, + "grad_norm": 0.00044233829248696566, + "learning_rate": 1.4925259176503446e-07, + "loss": 0.0, + "num_input_tokens_seen": 15693456, + "step": 31880 + }, + { + "epoch": 4.208129866701861, + "grad_norm": 7.04431367921643e-05, + "learning_rate": 1.4901055074129888e-07, + "loss": 0.0, + "num_input_tokens_seen": 15695888, + "step": 31885 + }, + { + "epoch": 4.208789758479609, + "grad_norm": 0.02665085531771183, + "learning_rate": 1.487686903339115e-07, + "loss": 0.0, + "num_input_tokens_seen": 15698064, + "step": 31890 + }, + { + "epoch": 4.2094496502573575, + "grad_norm": 3.517913137329742e-05, + "learning_rate": 1.4852701059420526e-07, + "loss": 0.0, + "num_input_tokens_seen": 15700368, + "step": 31895 + }, + { + "epoch": 4.210109542035106, + "grad_norm": 0.14689721167087555, + "learning_rate": 1.4828551157347514e-07, + "loss": 0.0, + "num_input_tokens_seen": 15702864, + "step": 31900 + }, + { + "epoch": 4.210769433812855, + "grad_norm": 0.0003207788977306336, + "learning_rate": 1.4804419332297746e-07, + "loss": 0.0, + "num_input_tokens_seen": 15705104, + "step": 31905 + }, + { + "epoch": 4.211429325590603, + "grad_norm": 0.0017935391515493393, + "learning_rate": 1.478030558939307e-07, + "loss": 0.0, + "num_input_tokens_seen": 15707344, + "step": 31910 + }, + { + "epoch": 4.2120892173683515, + "grad_norm": 0.0030650501139461994, + "learning_rate": 1.4756209933751396e-07, + "loss": 0.0, + "num_input_tokens_seen": 15709904, + "step": 31915 + }, + { + "epoch": 4.2127491091461, + "grad_norm": 6.088883674237877e-05, + "learning_rate": 1.4732132370486872e-07, + "loss": 0.0, + "num_input_tokens_seen": 15712272, + "step": 31920 + }, + { + "epoch": 4.213409000923848, + "grad_norm": 0.042080190032720566, + "learning_rate": 1.4708072904709812e-07, + "loss": 0.0, + "num_input_tokens_seen": 15714896, + "step": 31925 + }, + { + "epoch": 4.214068892701597, + "grad_norm": 0.00020239691366441548, + "learning_rate": 1.468403154152663e-07, + "loss": 0.0011, + "num_input_tokens_seen": 15717456, + "step": 31930 + }, + { + "epoch": 4.2147287844793455, + "grad_norm": 0.0012077669380232692, + "learning_rate": 1.4660008286039937e-07, + "loss": 0.0113, + "num_input_tokens_seen": 15720016, + "step": 31935 + }, + { + "epoch": 4.215388676257094, + "grad_norm": 0.0007291028741747141, + "learning_rate": 1.4636003143348518e-07, + "loss": 0.0, + "num_input_tokens_seen": 15722320, + "step": 31940 + }, + { + "epoch": 4.216048568034842, + "grad_norm": 0.0003883135796058923, + "learning_rate": 1.4612016118547265e-07, + "loss": 0.0, + "num_input_tokens_seen": 15724816, + "step": 31945 + }, + { + "epoch": 4.21670845981259, + "grad_norm": 0.00250708544626832, + "learning_rate": 1.4588047216727251e-07, + "loss": 0.0396, + "num_input_tokens_seen": 15727440, + "step": 31950 + }, + { + "epoch": 4.2173683515903395, + "grad_norm": 0.00013595109339803457, + "learning_rate": 1.4564096442975715e-07, + "loss": 0.0, + "num_input_tokens_seen": 15729744, + "step": 31955 + }, + { + "epoch": 4.218028243368088, + "grad_norm": 1.639278889342677e-05, + "learning_rate": 1.454016380237605e-07, + "loss": 0.0, + "num_input_tokens_seen": 15732304, + "step": 31960 + }, + { + "epoch": 4.218688135145836, + "grad_norm": 0.0030679525807499886, + "learning_rate": 1.4516249300007743e-07, + "loss": 0.0, + "num_input_tokens_seen": 15734608, + "step": 31965 + }, + { + "epoch": 4.219348026923584, + "grad_norm": 0.002188972430303693, + "learning_rate": 1.4492352940946506e-07, + "loss": 0.0, + "num_input_tokens_seen": 15736976, + "step": 31970 + }, + { + "epoch": 4.220007918701333, + "grad_norm": 0.002761758165434003, + "learning_rate": 1.4468474730264168e-07, + "loss": 0.0019, + "num_input_tokens_seen": 15739664, + "step": 31975 + }, + { + "epoch": 4.220667810479082, + "grad_norm": 0.0013528363779187202, + "learning_rate": 1.4444614673028687e-07, + "loss": 0.0, + "num_input_tokens_seen": 15742096, + "step": 31980 + }, + { + "epoch": 4.22132770225683, + "grad_norm": 7.338653085753322e-05, + "learning_rate": 1.442077277430419e-07, + "loss": 0.0, + "num_input_tokens_seen": 15744464, + "step": 31985 + }, + { + "epoch": 4.221987594034578, + "grad_norm": 0.0034586521796882153, + "learning_rate": 1.4396949039150984e-07, + "loss": 0.0, + "num_input_tokens_seen": 15746896, + "step": 31990 + }, + { + "epoch": 4.222647485812327, + "grad_norm": 0.00011525737500051036, + "learning_rate": 1.4373143472625438e-07, + "loss": 0.0, + "num_input_tokens_seen": 15749200, + "step": 31995 + }, + { + "epoch": 4.223307377590075, + "grad_norm": 0.0010359887965023518, + "learning_rate": 1.4349356079780116e-07, + "loss": 0.0, + "num_input_tokens_seen": 15751696, + "step": 32000 + }, + { + "epoch": 4.223967269367824, + "grad_norm": 13.788939476013184, + "learning_rate": 1.432558686566374e-07, + "loss": 0.0308, + "num_input_tokens_seen": 15754256, + "step": 32005 + }, + { + "epoch": 4.224627161145572, + "grad_norm": 0.00019276590319350362, + "learning_rate": 1.4301835835321175e-07, + "loss": 0.0323, + "num_input_tokens_seen": 15757008, + "step": 32010 + }, + { + "epoch": 4.225287052923321, + "grad_norm": 0.0015638087643310428, + "learning_rate": 1.4278102993793362e-07, + "loss": 0.0, + "num_input_tokens_seen": 15759312, + "step": 32015 + }, + { + "epoch": 4.225946944701069, + "grad_norm": 0.00024061364820227027, + "learning_rate": 1.4254388346117408e-07, + "loss": 0.0, + "num_input_tokens_seen": 15761616, + "step": 32020 + }, + { + "epoch": 4.226606836478817, + "grad_norm": 0.00010773177200462669, + "learning_rate": 1.423069189732664e-07, + "loss": 0.0, + "num_input_tokens_seen": 15764176, + "step": 32025 + }, + { + "epoch": 4.227266728256566, + "grad_norm": 0.009924034588038921, + "learning_rate": 1.4207013652450405e-07, + "loss": 0.0042, + "num_input_tokens_seen": 15766736, + "step": 32030 + }, + { + "epoch": 4.227926620034315, + "grad_norm": 0.00042953903903253376, + "learning_rate": 1.4183353616514293e-07, + "loss": 0.0023, + "num_input_tokens_seen": 15769424, + "step": 32035 + }, + { + "epoch": 4.228586511812063, + "grad_norm": 0.006489480845630169, + "learning_rate": 1.415971179453991e-07, + "loss": 0.0, + "num_input_tokens_seen": 15772240, + "step": 32040 + }, + { + "epoch": 4.229246403589811, + "grad_norm": 0.0013794874539598823, + "learning_rate": 1.4136088191545083e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15774608, + "step": 32045 + }, + { + "epoch": 4.2299062953675595, + "grad_norm": 0.027372226119041443, + "learning_rate": 1.411248281254379e-07, + "loss": 0.0, + "num_input_tokens_seen": 15777040, + "step": 32050 + }, + { + "epoch": 4.230566187145308, + "grad_norm": 0.00011632290261331946, + "learning_rate": 1.408889566254603e-07, + "loss": 0.0, + "num_input_tokens_seen": 15779472, + "step": 32055 + }, + { + "epoch": 4.231226078923057, + "grad_norm": 0.0005008972948417068, + "learning_rate": 1.4065326746558092e-07, + "loss": 0.0, + "num_input_tokens_seen": 15781904, + "step": 32060 + }, + { + "epoch": 4.231885970700805, + "grad_norm": 0.03096534125506878, + "learning_rate": 1.4041776069582233e-07, + "loss": 0.0, + "num_input_tokens_seen": 15784592, + "step": 32065 + }, + { + "epoch": 4.2325458624785535, + "grad_norm": 0.00018453155644237995, + "learning_rate": 1.4018243636616967e-07, + "loss": 0.0, + "num_input_tokens_seen": 15787024, + "step": 32070 + }, + { + "epoch": 4.233205754256302, + "grad_norm": 0.000387275853427127, + "learning_rate": 1.399472945265684e-07, + "loss": 0.0, + "num_input_tokens_seen": 15789456, + "step": 32075 + }, + { + "epoch": 4.23386564603405, + "grad_norm": 0.0003460666921455413, + "learning_rate": 1.397123352269257e-07, + "loss": 0.0176, + "num_input_tokens_seen": 15791888, + "step": 32080 + }, + { + "epoch": 4.234525537811799, + "grad_norm": 0.0038513634353876114, + "learning_rate": 1.3947755851711053e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15794128, + "step": 32085 + }, + { + "epoch": 4.2351854295895475, + "grad_norm": 0.021419484168291092, + "learning_rate": 1.3924296444695194e-07, + "loss": 0.0, + "num_input_tokens_seen": 15796304, + "step": 32090 + }, + { + "epoch": 4.235845321367296, + "grad_norm": 0.021339669823646545, + "learning_rate": 1.3900855306624093e-07, + "loss": 0.0, + "num_input_tokens_seen": 15798800, + "step": 32095 + }, + { + "epoch": 4.236505213145044, + "grad_norm": 0.0009724851697683334, + "learning_rate": 1.387743244247299e-07, + "loss": 0.0007, + "num_input_tokens_seen": 15801424, + "step": 32100 + }, + { + "epoch": 4.237165104922792, + "grad_norm": 0.003946192096918821, + "learning_rate": 1.385402785721319e-07, + "loss": 0.0, + "num_input_tokens_seen": 15804240, + "step": 32105 + }, + { + "epoch": 4.2378249967005415, + "grad_norm": 1.2278902431717142e-05, + "learning_rate": 1.3830641555812162e-07, + "loss": 0.0, + "num_input_tokens_seen": 15806544, + "step": 32110 + }, + { + "epoch": 4.23848488847829, + "grad_norm": 0.3208927512168884, + "learning_rate": 1.3807273543233466e-07, + "loss": 0.0268, + "num_input_tokens_seen": 15809552, + "step": 32115 + }, + { + "epoch": 4.239144780256038, + "grad_norm": 0.19890807569026947, + "learning_rate": 1.3783923824436817e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15811984, + "step": 32120 + }, + { + "epoch": 4.239804672033786, + "grad_norm": 0.00046882135211490095, + "learning_rate": 1.3760592404377991e-07, + "loss": 0.0, + "num_input_tokens_seen": 15814608, + "step": 32125 + }, + { + "epoch": 4.240464563811535, + "grad_norm": 0.000249588891165331, + "learning_rate": 1.373727928800894e-07, + "loss": 0.1054, + "num_input_tokens_seen": 15817040, + "step": 32130 + }, + { + "epoch": 4.241124455589284, + "grad_norm": 9.873955726623535, + "learning_rate": 1.3713984480277708e-07, + "loss": 0.0323, + "num_input_tokens_seen": 15819600, + "step": 32135 + }, + { + "epoch": 4.241784347367032, + "grad_norm": 0.00012521083408501, + "learning_rate": 1.3690707986128414e-07, + "loss": 0.0, + "num_input_tokens_seen": 15822608, + "step": 32140 + }, + { + "epoch": 4.24244423914478, + "grad_norm": 0.06816807389259338, + "learning_rate": 1.3667449810501353e-07, + "loss": 0.0, + "num_input_tokens_seen": 15825360, + "step": 32145 + }, + { + "epoch": 4.243104130922529, + "grad_norm": 8.7230589997489e-05, + "learning_rate": 1.3644209958332908e-07, + "loss": 0.0, + "num_input_tokens_seen": 15827792, + "step": 32150 + }, + { + "epoch": 4.243764022700277, + "grad_norm": 0.00036624076892621815, + "learning_rate": 1.3620988434555546e-07, + "loss": 0.0253, + "num_input_tokens_seen": 15830224, + "step": 32155 + }, + { + "epoch": 4.244423914478026, + "grad_norm": 2.8342570658423938e-05, + "learning_rate": 1.3597785244097882e-07, + "loss": 0.0381, + "num_input_tokens_seen": 15832720, + "step": 32160 + }, + { + "epoch": 4.245083806255774, + "grad_norm": 0.03173115476965904, + "learning_rate": 1.3574600391884627e-07, + "loss": 0.0, + "num_input_tokens_seen": 15835152, + "step": 32165 + }, + { + "epoch": 4.245743698033523, + "grad_norm": 0.0012725105043500662, + "learning_rate": 1.3551433882836615e-07, + "loss": 0.0, + "num_input_tokens_seen": 15837648, + "step": 32170 + }, + { + "epoch": 4.246403589811271, + "grad_norm": 0.004674192983657122, + "learning_rate": 1.3528285721870747e-07, + "loss": 0.0, + "num_input_tokens_seen": 15839888, + "step": 32175 + }, + { + "epoch": 4.247063481589019, + "grad_norm": 0.007815685123205185, + "learning_rate": 1.3505155913900012e-07, + "loss": 0.0, + "num_input_tokens_seen": 15842640, + "step": 32180 + }, + { + "epoch": 4.247723373366767, + "grad_norm": 14.349305152893066, + "learning_rate": 1.3482044463833632e-07, + "loss": 0.0411, + "num_input_tokens_seen": 15845072, + "step": 32185 + }, + { + "epoch": 4.248383265144517, + "grad_norm": 0.0006116251461207867, + "learning_rate": 1.3458951376576778e-07, + "loss": 0.0046, + "num_input_tokens_seen": 15847504, + "step": 32190 + }, + { + "epoch": 4.249043156922265, + "grad_norm": 0.001768477144651115, + "learning_rate": 1.343587665703082e-07, + "loss": 0.0, + "num_input_tokens_seen": 15850064, + "step": 32195 + }, + { + "epoch": 4.249703048700013, + "grad_norm": 0.004840330220758915, + "learning_rate": 1.341282031009321e-07, + "loss": 0.0, + "num_input_tokens_seen": 15852752, + "step": 32200 + }, + { + "epoch": 4.250362940477761, + "grad_norm": 15.46373462677002, + "learning_rate": 1.338978234065745e-07, + "loss": 0.0442, + "num_input_tokens_seen": 15855056, + "step": 32205 + }, + { + "epoch": 4.25102283225551, + "grad_norm": 9.834176063537598, + "learning_rate": 1.3366762753613236e-07, + "loss": 0.0143, + "num_input_tokens_seen": 15857488, + "step": 32210 + }, + { + "epoch": 4.251682724033259, + "grad_norm": 4.949681758880615, + "learning_rate": 1.3343761553846222e-07, + "loss": 0.0087, + "num_input_tokens_seen": 15859920, + "step": 32215 + }, + { + "epoch": 4.251682724033259, + "eval_loss": 0.24730534851551056, + "eval_runtime": 7.8134, + "eval_samples_per_second": 861.983, + "eval_steps_per_second": 107.764, + "num_input_tokens_seen": 15859920, + "step": 32215 + }, + { + "epoch": 4.252342615811007, + "grad_norm": 0.00021375197684392333, + "learning_rate": 1.332077874623836e-07, + "loss": 0.0, + "num_input_tokens_seen": 15862480, + "step": 32220 + }, + { + "epoch": 4.253002507588755, + "grad_norm": 1.4487995031231549e-05, + "learning_rate": 1.3297814335667523e-07, + "loss": 0.0577, + "num_input_tokens_seen": 15865296, + "step": 32225 + }, + { + "epoch": 4.253662399366504, + "grad_norm": 0.0018687325064092875, + "learning_rate": 1.3274868327007715e-07, + "loss": 0.0548, + "num_input_tokens_seen": 15867600, + "step": 32230 + }, + { + "epoch": 4.254322291144252, + "grad_norm": 2.8422791729099117e-05, + "learning_rate": 1.3251940725129108e-07, + "loss": 0.0122, + "num_input_tokens_seen": 15870032, + "step": 32235 + }, + { + "epoch": 4.254982182922001, + "grad_norm": 0.0015061901649460196, + "learning_rate": 1.3229031534897882e-07, + "loss": 0.0, + "num_input_tokens_seen": 15872464, + "step": 32240 + }, + { + "epoch": 4.255642074699749, + "grad_norm": 0.0064211683347821236, + "learning_rate": 1.320614076117641e-07, + "loss": 0.0, + "num_input_tokens_seen": 15874768, + "step": 32245 + }, + { + "epoch": 4.256301966477498, + "grad_norm": 0.0002376893098698929, + "learning_rate": 1.318326840882301e-07, + "loss": 0.0, + "num_input_tokens_seen": 15877136, + "step": 32250 + }, + { + "epoch": 4.256961858255246, + "grad_norm": 9.079680603463203e-05, + "learning_rate": 1.3160414482692217e-07, + "loss": 0.0, + "num_input_tokens_seen": 15879312, + "step": 32255 + }, + { + "epoch": 4.257621750032994, + "grad_norm": 0.0030448322650045156, + "learning_rate": 1.3137578987634635e-07, + "loss": 0.0, + "num_input_tokens_seen": 15881936, + "step": 32260 + }, + { + "epoch": 4.258281641810743, + "grad_norm": 0.00842120312154293, + "learning_rate": 1.3114761928496875e-07, + "loss": 0.0, + "num_input_tokens_seen": 15884240, + "step": 32265 + }, + { + "epoch": 4.258941533588492, + "grad_norm": 0.00013276837125886232, + "learning_rate": 1.3091963310121734e-07, + "loss": 0.001, + "num_input_tokens_seen": 15886736, + "step": 32270 + }, + { + "epoch": 4.25960142536624, + "grad_norm": 0.00280931917950511, + "learning_rate": 1.306918313734805e-07, + "loss": 0.0, + "num_input_tokens_seen": 15888976, + "step": 32275 + }, + { + "epoch": 4.260261317143988, + "grad_norm": 0.00019990344299003482, + "learning_rate": 1.3046421415010732e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15891088, + "step": 32280 + }, + { + "epoch": 4.2609212089217365, + "grad_norm": 0.01581178978085518, + "learning_rate": 1.3023678147940797e-07, + "loss": 0.0, + "num_input_tokens_seen": 15893712, + "step": 32285 + }, + { + "epoch": 4.261581100699486, + "grad_norm": 0.00010969245340675116, + "learning_rate": 1.3000953340965336e-07, + "loss": 0.0213, + "num_input_tokens_seen": 15896144, + "step": 32290 + }, + { + "epoch": 4.262240992477234, + "grad_norm": 0.0008918251842260361, + "learning_rate": 1.297824699890756e-07, + "loss": 0.0, + "num_input_tokens_seen": 15898640, + "step": 32295 + }, + { + "epoch": 4.262900884254982, + "grad_norm": 9.960948955267668e-05, + "learning_rate": 1.2955559126586667e-07, + "loss": 0.0, + "num_input_tokens_seen": 15901008, + "step": 32300 + }, + { + "epoch": 4.2635607760327305, + "grad_norm": 0.0014763657236471772, + "learning_rate": 1.293288972881803e-07, + "loss": 0.0, + "num_input_tokens_seen": 15903696, + "step": 32305 + }, + { + "epoch": 4.264220667810479, + "grad_norm": 0.08612053841352463, + "learning_rate": 1.2910238810413075e-07, + "loss": 0.0, + "num_input_tokens_seen": 15906128, + "step": 32310 + }, + { + "epoch": 4.264880559588228, + "grad_norm": 0.015839478000998497, + "learning_rate": 1.2887606376179262e-07, + "loss": 0.0, + "num_input_tokens_seen": 15908624, + "step": 32315 + }, + { + "epoch": 4.265540451365976, + "grad_norm": 0.00016956948093138635, + "learning_rate": 1.2864992430920164e-07, + "loss": 0.0001, + "num_input_tokens_seen": 15910864, + "step": 32320 + }, + { + "epoch": 4.2662003431437245, + "grad_norm": 3.184717570547946e-05, + "learning_rate": 1.2842396979435476e-07, + "loss": 0.0004, + "num_input_tokens_seen": 15913296, + "step": 32325 + }, + { + "epoch": 4.266860234921473, + "grad_norm": 1.2898004570160992e-05, + "learning_rate": 1.2819820026520856e-07, + "loss": 0.0, + "num_input_tokens_seen": 15915792, + "step": 32330 + }, + { + "epoch": 4.267520126699221, + "grad_norm": 0.0007911850116215646, + "learning_rate": 1.2797261576968133e-07, + "loss": 0.0, + "num_input_tokens_seen": 15917968, + "step": 32335 + }, + { + "epoch": 4.268180018476969, + "grad_norm": 0.0031948827672749758, + "learning_rate": 1.2774721635565156e-07, + "loss": 0.0, + "num_input_tokens_seen": 15920656, + "step": 32340 + }, + { + "epoch": 4.2688399102547185, + "grad_norm": 0.0011284436332061887, + "learning_rate": 1.275220020709591e-07, + "loss": 0.0, + "num_input_tokens_seen": 15923024, + "step": 32345 + }, + { + "epoch": 4.269499802032467, + "grad_norm": 0.0007449170225299895, + "learning_rate": 1.2729697296340358e-07, + "loss": 0.0503, + "num_input_tokens_seen": 15925328, + "step": 32350 + }, + { + "epoch": 4.270159693810215, + "grad_norm": 0.0018473287345841527, + "learning_rate": 1.270721290807456e-07, + "loss": 0.0, + "num_input_tokens_seen": 15927760, + "step": 32355 + }, + { + "epoch": 4.270819585587963, + "grad_norm": 0.08810362219810486, + "learning_rate": 1.268474704707073e-07, + "loss": 0.0, + "num_input_tokens_seen": 15930192, + "step": 32360 + }, + { + "epoch": 4.271479477365712, + "grad_norm": 2.55685572483344e-05, + "learning_rate": 1.2662299718097036e-07, + "loss": 0.0747, + "num_input_tokens_seen": 15932368, + "step": 32365 + }, + { + "epoch": 4.272139369143461, + "grad_norm": 0.0030504302121698856, + "learning_rate": 1.2639870925917805e-07, + "loss": 0.0, + "num_input_tokens_seen": 15934928, + "step": 32370 + }, + { + "epoch": 4.272799260921209, + "grad_norm": 0.0010365161579102278, + "learning_rate": 1.2617460675293312e-07, + "loss": 0.0, + "num_input_tokens_seen": 15937232, + "step": 32375 + }, + { + "epoch": 4.273459152698957, + "grad_norm": 0.0027514533139765263, + "learning_rate": 1.259506897098005e-07, + "loss": 0.0, + "num_input_tokens_seen": 15940176, + "step": 32380 + }, + { + "epoch": 4.274119044476706, + "grad_norm": 1.8370121717453003, + "learning_rate": 1.2572695817730473e-07, + "loss": 0.0005, + "num_input_tokens_seen": 15942608, + "step": 32385 + }, + { + "epoch": 4.274778936254454, + "grad_norm": 0.006444776430726051, + "learning_rate": 1.2550341220293059e-07, + "loss": 0.0, + "num_input_tokens_seen": 15945296, + "step": 32390 + }, + { + "epoch": 4.275438828032203, + "grad_norm": 0.00025173244648613036, + "learning_rate": 1.2528005183412503e-07, + "loss": 0.0, + "num_input_tokens_seen": 15947920, + "step": 32395 + }, + { + "epoch": 4.276098719809951, + "grad_norm": 3.405748793738894e-05, + "learning_rate": 1.2505687711829417e-07, + "loss": 0.0, + "num_input_tokens_seen": 15950672, + "step": 32400 + }, + { + "epoch": 4.2767586115877, + "grad_norm": 0.00016917857283260673, + "learning_rate": 1.2483388810280538e-07, + "loss": 0.0016, + "num_input_tokens_seen": 15953552, + "step": 32405 + }, + { + "epoch": 4.277418503365448, + "grad_norm": 0.010513650253415108, + "learning_rate": 1.2461108483498617e-07, + "loss": 0.0007, + "num_input_tokens_seen": 15955920, + "step": 32410 + }, + { + "epoch": 4.278078395143196, + "grad_norm": 0.000138119314215146, + "learning_rate": 1.2438846736212516e-07, + "loss": 0.0, + "num_input_tokens_seen": 15958544, + "step": 32415 + }, + { + "epoch": 4.278738286920945, + "grad_norm": 0.012299539521336555, + "learning_rate": 1.2416603573147155e-07, + "loss": 0.0, + "num_input_tokens_seen": 15961168, + "step": 32420 + }, + { + "epoch": 4.279398178698694, + "grad_norm": 0.00020890981249976903, + "learning_rate": 1.2394378999023426e-07, + "loss": 0.0, + "num_input_tokens_seen": 15963408, + "step": 32425 + }, + { + "epoch": 4.280058070476442, + "grad_norm": 0.00022947814431972802, + "learning_rate": 1.2372173018558373e-07, + "loss": 0.0, + "num_input_tokens_seen": 15966224, + "step": 32430 + }, + { + "epoch": 4.28071796225419, + "grad_norm": 0.0012638174230232835, + "learning_rate": 1.2349985636465054e-07, + "loss": 0.0002, + "num_input_tokens_seen": 15968464, + "step": 32435 + }, + { + "epoch": 4.2813778540319385, + "grad_norm": 0.004594683647155762, + "learning_rate": 1.2327816857452567e-07, + "loss": 0.0, + "num_input_tokens_seen": 15971280, + "step": 32440 + }, + { + "epoch": 4.282037745809687, + "grad_norm": 0.00012326195428613573, + "learning_rate": 1.230566668622607e-07, + "loss": 0.0, + "num_input_tokens_seen": 15973520, + "step": 32445 + }, + { + "epoch": 4.282697637587436, + "grad_norm": 0.0018847265746444464, + "learning_rate": 1.2283535127486789e-07, + "loss": 0.0, + "num_input_tokens_seen": 15976016, + "step": 32450 + }, + { + "epoch": 4.283357529365184, + "grad_norm": 0.09880480915307999, + "learning_rate": 1.2261422185932003e-07, + "loss": 0.0, + "num_input_tokens_seen": 15978320, + "step": 32455 + }, + { + "epoch": 4.2840174211429325, + "grad_norm": 1.9902327039744705e-05, + "learning_rate": 1.223932786625499e-07, + "loss": 0.0, + "num_input_tokens_seen": 15980880, + "step": 32460 + }, + { + "epoch": 4.284677312920681, + "grad_norm": 0.0009088137885555625, + "learning_rate": 1.221725217314512e-07, + "loss": 0.0, + "num_input_tokens_seen": 15983312, + "step": 32465 + }, + { + "epoch": 4.285337204698429, + "grad_norm": 2.24759578704834, + "learning_rate": 1.2195195111287827e-07, + "loss": 0.0017, + "num_input_tokens_seen": 15985872, + "step": 32470 + }, + { + "epoch": 4.285997096476178, + "grad_norm": 0.0002791814331430942, + "learning_rate": 1.2173156685364516e-07, + "loss": 0.0, + "num_input_tokens_seen": 15988304, + "step": 32475 + }, + { + "epoch": 4.2866569882539265, + "grad_norm": 44.14458084106445, + "learning_rate": 1.2151136900052706e-07, + "loss": 0.024, + "num_input_tokens_seen": 15990672, + "step": 32480 + }, + { + "epoch": 4.287316880031675, + "grad_norm": 0.0004078407946508378, + "learning_rate": 1.2129135760025955e-07, + "loss": 0.0008, + "num_input_tokens_seen": 15993040, + "step": 32485 + }, + { + "epoch": 4.287976771809423, + "grad_norm": 5.2265910198912024e-05, + "learning_rate": 1.2107153269953818e-07, + "loss": 0.0007, + "num_input_tokens_seen": 15995792, + "step": 32490 + }, + { + "epoch": 4.288636663587171, + "grad_norm": 0.0001401216140948236, + "learning_rate": 1.208518943450192e-07, + "loss": 0.0, + "num_input_tokens_seen": 15998288, + "step": 32495 + }, + { + "epoch": 4.2892965553649205, + "grad_norm": 0.1450749933719635, + "learning_rate": 1.2063244258331938e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16000912, + "step": 32500 + }, + { + "epoch": 4.289956447142669, + "grad_norm": 0.0006044969195500016, + "learning_rate": 1.2041317746101599e-07, + "loss": 0.0, + "num_input_tokens_seen": 16003088, + "step": 32505 + }, + { + "epoch": 4.290616338920417, + "grad_norm": 1.2627470823645126e-05, + "learning_rate": 1.2019409902464616e-07, + "loss": 0.0, + "num_input_tokens_seen": 16005776, + "step": 32510 + }, + { + "epoch": 4.291276230698165, + "grad_norm": 4.5773995225317776e-05, + "learning_rate": 1.1997520732070742e-07, + "loss": 0.0, + "num_input_tokens_seen": 16008144, + "step": 32515 + }, + { + "epoch": 4.291936122475914, + "grad_norm": 0.004673975054174662, + "learning_rate": 1.197565023956586e-07, + "loss": 0.0, + "num_input_tokens_seen": 16010768, + "step": 32520 + }, + { + "epoch": 4.292596014253663, + "grad_norm": 0.0012522018514573574, + "learning_rate": 1.1953798429591778e-07, + "loss": 0.0, + "num_input_tokens_seen": 16013200, + "step": 32525 + }, + { + "epoch": 4.293255906031411, + "grad_norm": 0.2917592525482178, + "learning_rate": 1.1931965306786396e-07, + "loss": 0.0002, + "num_input_tokens_seen": 16015824, + "step": 32530 + }, + { + "epoch": 4.293915797809159, + "grad_norm": 0.0011305802036076784, + "learning_rate": 1.1910150875783664e-07, + "loss": 0.0, + "num_input_tokens_seen": 16018064, + "step": 32535 + }, + { + "epoch": 4.294575689586908, + "grad_norm": 1.8471331713953987e-05, + "learning_rate": 1.1888355141213491e-07, + "loss": 0.0, + "num_input_tokens_seen": 16020432, + "step": 32540 + }, + { + "epoch": 4.295235581364656, + "grad_norm": 7.753491081530228e-05, + "learning_rate": 1.1866578107701897e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16023056, + "step": 32545 + }, + { + "epoch": 4.295895473142405, + "grad_norm": 0.00025911873672157526, + "learning_rate": 1.1844819779870862e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16025360, + "step": 32550 + }, + { + "epoch": 4.296555364920153, + "grad_norm": 0.0005726946983486414, + "learning_rate": 1.1823080162338483e-07, + "loss": 0.0, + "num_input_tokens_seen": 16027920, + "step": 32555 + }, + { + "epoch": 4.297215256697902, + "grad_norm": 0.017819080501794815, + "learning_rate": 1.1801359259718823e-07, + "loss": 0.0, + "num_input_tokens_seen": 16030416, + "step": 32560 + }, + { + "epoch": 4.29787514847565, + "grad_norm": 0.0017297941958531737, + "learning_rate": 1.1779657076621951e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16032784, + "step": 32565 + }, + { + "epoch": 4.298535040253398, + "grad_norm": 0.0005568054039031267, + "learning_rate": 1.1757973617654027e-07, + "loss": 0.0, + "num_input_tokens_seen": 16035216, + "step": 32570 + }, + { + "epoch": 4.299194932031147, + "grad_norm": 0.006575642619282007, + "learning_rate": 1.1736308887417201e-07, + "loss": 0.0, + "num_input_tokens_seen": 16037584, + "step": 32575 + }, + { + "epoch": 4.299854823808896, + "grad_norm": 1.1569028174562845e-05, + "learning_rate": 1.1714662890509685e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16040016, + "step": 32580 + }, + { + "epoch": 4.300514715586644, + "grad_norm": 7.299587014131248e-05, + "learning_rate": 1.1693035631525628e-07, + "loss": 0.0, + "num_input_tokens_seen": 16042640, + "step": 32585 + }, + { + "epoch": 4.301174607364392, + "grad_norm": 0.0003289075684733689, + "learning_rate": 1.1671427115055299e-07, + "loss": 0.0, + "num_input_tokens_seen": 16045136, + "step": 32590 + }, + { + "epoch": 4.3018344991421404, + "grad_norm": 1.6217174561461434e-05, + "learning_rate": 1.1649837345684954e-07, + "loss": 0.0006, + "num_input_tokens_seen": 16047696, + "step": 32595 + }, + { + "epoch": 4.302494390919889, + "grad_norm": 2.654941454238724e-05, + "learning_rate": 1.1628266327996827e-07, + "loss": 0.0004, + "num_input_tokens_seen": 16050000, + "step": 32600 + }, + { + "epoch": 4.303154282697638, + "grad_norm": 0.0025819791480898857, + "learning_rate": 1.1606714066569235e-07, + "loss": 0.0003, + "num_input_tokens_seen": 16052624, + "step": 32605 + }, + { + "epoch": 4.303814174475386, + "grad_norm": 22.277074813842773, + "learning_rate": 1.1585180565976515e-07, + "loss": 0.0361, + "num_input_tokens_seen": 16054864, + "step": 32610 + }, + { + "epoch": 4.3044740662531344, + "grad_norm": 0.00020241711172275245, + "learning_rate": 1.1563665830788948e-07, + "loss": 0.0, + "num_input_tokens_seen": 16057104, + "step": 32615 + }, + { + "epoch": 4.305133958030883, + "grad_norm": 0.00014118028047960252, + "learning_rate": 1.1542169865572904e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16059472, + "step": 32620 + }, + { + "epoch": 4.305793849808631, + "grad_norm": 1.2410049748723395e-05, + "learning_rate": 1.1520692674890741e-07, + "loss": 0.0002, + "num_input_tokens_seen": 16061712, + "step": 32625 + }, + { + "epoch": 4.30645374158638, + "grad_norm": 5.3214229410514235e-05, + "learning_rate": 1.149923426330086e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16064016, + "step": 32630 + }, + { + "epoch": 4.3071136333641284, + "grad_norm": 2.0495712306001224e-05, + "learning_rate": 1.1477794635357618e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16066192, + "step": 32635 + }, + { + "epoch": 4.307773525141877, + "grad_norm": 0.00010133234172826633, + "learning_rate": 1.145637379561144e-07, + "loss": 0.0, + "num_input_tokens_seen": 16068368, + "step": 32640 + }, + { + "epoch": 4.308433416919625, + "grad_norm": 2.9838472983101383e-05, + "learning_rate": 1.1434971748608757e-07, + "loss": 0.0002, + "num_input_tokens_seen": 16070416, + "step": 32645 + }, + { + "epoch": 4.309093308697373, + "grad_norm": 0.0007358550792559981, + "learning_rate": 1.1413588498891957e-07, + "loss": 0.0, + "num_input_tokens_seen": 16072784, + "step": 32650 + }, + { + "epoch": 4.3097532004751224, + "grad_norm": 0.09885164350271225, + "learning_rate": 1.139222405099951e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16075280, + "step": 32655 + }, + { + "epoch": 4.310413092252871, + "grad_norm": 0.0009102729964070022, + "learning_rate": 1.137087840946589e-07, + "loss": 0.0, + "num_input_tokens_seen": 16078032, + "step": 32660 + }, + { + "epoch": 4.311072984030619, + "grad_norm": 0.000612208095844835, + "learning_rate": 1.1349551578821493e-07, + "loss": 0.0133, + "num_input_tokens_seen": 16080464, + "step": 32665 + }, + { + "epoch": 4.311732875808367, + "grad_norm": 3.099453169852495e-05, + "learning_rate": 1.1328243563592831e-07, + "loss": 0.0, + "num_input_tokens_seen": 16082960, + "step": 32670 + }, + { + "epoch": 4.312392767586116, + "grad_norm": 3.5384764487389475e-05, + "learning_rate": 1.1306954368302357e-07, + "loss": 0.0, + "num_input_tokens_seen": 16085456, + "step": 32675 + }, + { + "epoch": 4.313052659363865, + "grad_norm": 2.569669231888838e-05, + "learning_rate": 1.1285683997468564e-07, + "loss": 0.0015, + "num_input_tokens_seen": 16087504, + "step": 32680 + }, + { + "epoch": 4.313712551141613, + "grad_norm": 0.0005885774153284729, + "learning_rate": 1.1264432455605933e-07, + "loss": 0.0, + "num_input_tokens_seen": 16089936, + "step": 32685 + }, + { + "epoch": 4.314372442919361, + "grad_norm": 4.543912291410379e-05, + "learning_rate": 1.1243199747224897e-07, + "loss": 0.0, + "num_input_tokens_seen": 16092112, + "step": 32690 + }, + { + "epoch": 4.31503233469711, + "grad_norm": 0.0004520398215390742, + "learning_rate": 1.122198587683203e-07, + "loss": 0.0, + "num_input_tokens_seen": 16094544, + "step": 32695 + }, + { + "epoch": 4.315692226474858, + "grad_norm": 1.6724603483453393e-05, + "learning_rate": 1.1200790848929764e-07, + "loss": 0.0239, + "num_input_tokens_seen": 16096848, + "step": 32700 + }, + { + "epoch": 4.316352118252606, + "grad_norm": 0.0003861555305775255, + "learning_rate": 1.1179614668016624e-07, + "loss": 0.0, + "num_input_tokens_seen": 16099024, + "step": 32705 + }, + { + "epoch": 4.317012010030355, + "grad_norm": 0.20465129613876343, + "learning_rate": 1.1158457338587047e-07, + "loss": 0.0144, + "num_input_tokens_seen": 16101776, + "step": 32710 + }, + { + "epoch": 4.317671901808104, + "grad_norm": 3.3590320526855066e-05, + "learning_rate": 1.1137318865131595e-07, + "loss": 0.0, + "num_input_tokens_seen": 16104144, + "step": 32715 + }, + { + "epoch": 4.318331793585852, + "grad_norm": 0.000302224128972739, + "learning_rate": 1.1116199252136727e-07, + "loss": 0.0, + "num_input_tokens_seen": 16106512, + "step": 32720 + }, + { + "epoch": 4.3189916853636, + "grad_norm": 0.12201271951198578, + "learning_rate": 1.1095098504084877e-07, + "loss": 0.0427, + "num_input_tokens_seen": 16108944, + "step": 32725 + }, + { + "epoch": 4.319651577141348, + "grad_norm": 0.04804328456521034, + "learning_rate": 1.1074016625454607e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16111312, + "step": 32730 + }, + { + "epoch": 4.320311468919098, + "grad_norm": 0.00011896403884747997, + "learning_rate": 1.1052953620720351e-07, + "loss": 0.028, + "num_input_tokens_seen": 16114000, + "step": 32735 + }, + { + "epoch": 4.320971360696846, + "grad_norm": 4.947075649397448e-05, + "learning_rate": 1.1031909494352588e-07, + "loss": 0.0322, + "num_input_tokens_seen": 16116112, + "step": 32740 + }, + { + "epoch": 4.321631252474594, + "grad_norm": 0.0005829980946145952, + "learning_rate": 1.1010884250817765e-07, + "loss": 0.0533, + "num_input_tokens_seen": 16118544, + "step": 32745 + }, + { + "epoch": 4.322291144252342, + "grad_norm": 5.172559031052515e-05, + "learning_rate": 1.098987789457836e-07, + "loss": 0.0, + "num_input_tokens_seen": 16120976, + "step": 32750 + }, + { + "epoch": 4.322951036030091, + "grad_norm": 4.1301213059341535e-05, + "learning_rate": 1.0968890430092825e-07, + "loss": 0.0096, + "num_input_tokens_seen": 16123600, + "step": 32755 + }, + { + "epoch": 4.32361092780784, + "grad_norm": 0.0037700431421399117, + "learning_rate": 1.0947921861815557e-07, + "loss": 0.0, + "num_input_tokens_seen": 16125840, + "step": 32760 + }, + { + "epoch": 4.324270819585588, + "grad_norm": 0.0031601234804838896, + "learning_rate": 1.0926972194197015e-07, + "loss": 0.0518, + "num_input_tokens_seen": 16128336, + "step": 32765 + }, + { + "epoch": 4.324930711363336, + "grad_norm": 0.00015431219071615487, + "learning_rate": 1.0906041431683632e-07, + "loss": 0.0, + "num_input_tokens_seen": 16131024, + "step": 32770 + }, + { + "epoch": 4.325590603141085, + "grad_norm": 0.0005814318428747356, + "learning_rate": 1.0885129578717767e-07, + "loss": 0.0, + "num_input_tokens_seen": 16133712, + "step": 32775 + }, + { + "epoch": 4.326250494918833, + "grad_norm": 2.0374101950437762e-05, + "learning_rate": 1.0864236639737823e-07, + "loss": 0.0, + "num_input_tokens_seen": 16136208, + "step": 32780 + }, + { + "epoch": 4.326910386696582, + "grad_norm": 3.942536568501964e-05, + "learning_rate": 1.0843362619178187e-07, + "loss": 0.0, + "num_input_tokens_seen": 16138576, + "step": 32785 + }, + { + "epoch": 4.32757027847433, + "grad_norm": 5.525383472442627, + "learning_rate": 1.0822507521469227e-07, + "loss": 0.0014, + "num_input_tokens_seen": 16141072, + "step": 32790 + }, + { + "epoch": 4.328230170252079, + "grad_norm": 6.413905066438019e-05, + "learning_rate": 1.0801671351037255e-07, + "loss": 0.0, + "num_input_tokens_seen": 16143632, + "step": 32795 + }, + { + "epoch": 4.328890062029827, + "grad_norm": 0.00015124822675716132, + "learning_rate": 1.0780854112304626e-07, + "loss": 0.0018, + "num_input_tokens_seen": 16146320, + "step": 32800 + }, + { + "epoch": 4.329549953807575, + "grad_norm": 0.0004587690345942974, + "learning_rate": 1.076005580968965e-07, + "loss": 0.0, + "num_input_tokens_seen": 16149008, + "step": 32805 + }, + { + "epoch": 4.330209845585324, + "grad_norm": 0.0013646406587213278, + "learning_rate": 1.0739276447606582e-07, + "loss": 0.0, + "num_input_tokens_seen": 16151504, + "step": 32810 + }, + { + "epoch": 4.330869737363073, + "grad_norm": 8.358648483408615e-05, + "learning_rate": 1.0718516030465708e-07, + "loss": 0.0, + "num_input_tokens_seen": 16154320, + "step": 32815 + }, + { + "epoch": 4.331529629140821, + "grad_norm": 0.0003338223323225975, + "learning_rate": 1.0697774562673312e-07, + "loss": 0.0, + "num_input_tokens_seen": 16156816, + "step": 32820 + }, + { + "epoch": 4.332189520918569, + "grad_norm": 0.0018141282489523292, + "learning_rate": 1.0677052048631563e-07, + "loss": 0.0, + "num_input_tokens_seen": 16158992, + "step": 32825 + }, + { + "epoch": 4.3328494126963175, + "grad_norm": 3.3614989661145955e-05, + "learning_rate": 1.0656348492738687e-07, + "loss": 0.0, + "num_input_tokens_seen": 16161296, + "step": 32830 + }, + { + "epoch": 4.333509304474067, + "grad_norm": 9.122475603362545e-05, + "learning_rate": 1.0635663899388881e-07, + "loss": 0.0, + "num_input_tokens_seen": 16163664, + "step": 32835 + }, + { + "epoch": 4.334169196251815, + "grad_norm": 6.432763620978221e-05, + "learning_rate": 1.0614998272972298e-07, + "loss": 0.0533, + "num_input_tokens_seen": 16165840, + "step": 32840 + }, + { + "epoch": 4.334829088029563, + "grad_norm": 2.337733531021513e-05, + "learning_rate": 1.0594351617875053e-07, + "loss": 0.0683, + "num_input_tokens_seen": 16168208, + "step": 32845 + }, + { + "epoch": 4.3354889798073115, + "grad_norm": 4.805472417501733e-05, + "learning_rate": 1.0573723938479217e-07, + "loss": 0.0, + "num_input_tokens_seen": 16170640, + "step": 32850 + }, + { + "epoch": 4.33614887158506, + "grad_norm": 0.0001322894386248663, + "learning_rate": 1.0553115239162935e-07, + "loss": 0.0, + "num_input_tokens_seen": 16172880, + "step": 32855 + }, + { + "epoch": 4.336808763362809, + "grad_norm": 1.8390241166343912e-05, + "learning_rate": 1.0532525524300206e-07, + "loss": 0.0, + "num_input_tokens_seen": 16175248, + "step": 32860 + }, + { + "epoch": 4.337468655140557, + "grad_norm": 6.988491804804653e-05, + "learning_rate": 1.0511954798261058e-07, + "loss": 0.0, + "num_input_tokens_seen": 16177680, + "step": 32865 + }, + { + "epoch": 4.3381285469183055, + "grad_norm": 0.0004975342308171093, + "learning_rate": 1.0491403065411508e-07, + "loss": 0.0472, + "num_input_tokens_seen": 16180048, + "step": 32870 + }, + { + "epoch": 4.338788438696054, + "grad_norm": 0.001267925021238625, + "learning_rate": 1.0470870330113457e-07, + "loss": 0.0, + "num_input_tokens_seen": 16182416, + "step": 32875 + }, + { + "epoch": 4.339448330473802, + "grad_norm": 0.0012009447673335671, + "learning_rate": 1.0450356596724886e-07, + "loss": 0.1348, + "num_input_tokens_seen": 16184848, + "step": 32880 + }, + { + "epoch": 4.34010822225155, + "grad_norm": 2.034113094850909e-05, + "learning_rate": 1.0429861869599622e-07, + "loss": 0.0, + "num_input_tokens_seen": 16187280, + "step": 32885 + }, + { + "epoch": 4.3407681140292995, + "grad_norm": 1.8013641238212585e-05, + "learning_rate": 1.0409386153087596e-07, + "loss": 0.0004, + "num_input_tokens_seen": 16189584, + "step": 32890 + }, + { + "epoch": 4.341428005807048, + "grad_norm": 0.0014492205809801817, + "learning_rate": 1.0388929451534601e-07, + "loss": 0.0, + "num_input_tokens_seen": 16191760, + "step": 32895 + }, + { + "epoch": 4.342087897584796, + "grad_norm": 0.0023908542934805155, + "learning_rate": 1.0368491769282395e-07, + "loss": 0.0, + "num_input_tokens_seen": 16194128, + "step": 32900 + }, + { + "epoch": 4.342747789362544, + "grad_norm": 8.186150080291554e-05, + "learning_rate": 1.0348073110668743e-07, + "loss": 0.0, + "num_input_tokens_seen": 16196752, + "step": 32905 + }, + { + "epoch": 4.343407681140293, + "grad_norm": 0.0023419694043695927, + "learning_rate": 1.0327673480027377e-07, + "loss": 0.0, + "num_input_tokens_seen": 16199248, + "step": 32910 + }, + { + "epoch": 4.344067572918042, + "grad_norm": 0.0005303608486428857, + "learning_rate": 1.0307292881687968e-07, + "loss": 0.0002, + "num_input_tokens_seen": 16201808, + "step": 32915 + }, + { + "epoch": 4.34472746469579, + "grad_norm": 0.1571546196937561, + "learning_rate": 1.0286931319976133e-07, + "loss": 0.0, + "num_input_tokens_seen": 16204304, + "step": 32920 + }, + { + "epoch": 4.345387356473538, + "grad_norm": 4.8940040869638324e-05, + "learning_rate": 1.026658879921346e-07, + "loss": 0.0, + "num_input_tokens_seen": 16206864, + "step": 32925 + }, + { + "epoch": 4.346047248251287, + "grad_norm": 6.440455436706543, + "learning_rate": 1.024626532371755e-07, + "loss": 0.0061, + "num_input_tokens_seen": 16209104, + "step": 32930 + }, + { + "epoch": 4.346707140029035, + "grad_norm": 0.00065460434416309, + "learning_rate": 1.0225960897801856e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16211536, + "step": 32935 + }, + { + "epoch": 4.347367031806784, + "grad_norm": 4.881566565018147e-05, + "learning_rate": 1.0205675525775858e-07, + "loss": 0.0, + "num_input_tokens_seen": 16213840, + "step": 32940 + }, + { + "epoch": 4.348026923584532, + "grad_norm": 0.008005255833268166, + "learning_rate": 1.0185409211945017e-07, + "loss": 0.0, + "num_input_tokens_seen": 16216144, + "step": 32945 + }, + { + "epoch": 4.348686815362281, + "grad_norm": 0.00028780216234736145, + "learning_rate": 1.0165161960610669e-07, + "loss": 0.0, + "num_input_tokens_seen": 16218512, + "step": 32950 + }, + { + "epoch": 4.349346707140029, + "grad_norm": 6.93507754476741e-05, + "learning_rate": 1.0144933776070163e-07, + "loss": 0.0, + "num_input_tokens_seen": 16221200, + "step": 32955 + }, + { + "epoch": 4.350006598917777, + "grad_norm": 1.4966816706873942e-05, + "learning_rate": 1.012472466261678e-07, + "loss": 0.0, + "num_input_tokens_seen": 16223632, + "step": 32960 + }, + { + "epoch": 4.3506664906955255, + "grad_norm": 0.0011455845087766647, + "learning_rate": 1.0104534624539785e-07, + "loss": 0.001, + "num_input_tokens_seen": 16226192, + "step": 32965 + }, + { + "epoch": 4.351326382473275, + "grad_norm": 0.00605833949521184, + "learning_rate": 1.0084363666124318e-07, + "loss": 0.0002, + "num_input_tokens_seen": 16228432, + "step": 32970 + }, + { + "epoch": 4.351986274251023, + "grad_norm": 0.0009054954862222075, + "learning_rate": 1.0064211791651544e-07, + "loss": 0.0, + "num_input_tokens_seen": 16230736, + "step": 32975 + }, + { + "epoch": 4.352646166028771, + "grad_norm": 1.4958550309529528e-05, + "learning_rate": 1.0044079005398576e-07, + "loss": 0.0001, + "num_input_tokens_seen": 16232976, + "step": 32980 + }, + { + "epoch": 4.3533060578065195, + "grad_norm": 2.5793897293624468e-05, + "learning_rate": 1.0023965311638415e-07, + "loss": 0.0, + "num_input_tokens_seen": 16235408, + "step": 32985 + }, + { + "epoch": 4.353965949584268, + "grad_norm": 5.313528163242154e-05, + "learning_rate": 1.0003870714640061e-07, + "loss": 0.0, + "num_input_tokens_seen": 16238032, + "step": 32990 + }, + { + "epoch": 4.354625841362017, + "grad_norm": 3.9087779441615567e-05, + "learning_rate": 9.983795218668456e-08, + "loss": 0.002, + "num_input_tokens_seen": 16240976, + "step": 32995 + }, + { + "epoch": 4.355285733139765, + "grad_norm": 3.251605812693015e-05, + "learning_rate": 9.963738827984458e-08, + "loss": 0.0384, + "num_input_tokens_seen": 16243088, + "step": 33000 + }, + { + "epoch": 4.3559456249175135, + "grad_norm": 0.00026064313715323806, + "learning_rate": 9.943701546844906e-08, + "loss": 0.0, + "num_input_tokens_seen": 16245520, + "step": 33005 + }, + { + "epoch": 4.356605516695262, + "grad_norm": 0.02836507558822632, + "learning_rate": 9.923683379502557e-08, + "loss": 0.0337, + "num_input_tokens_seen": 16248016, + "step": 33010 + }, + { + "epoch": 4.35726540847301, + "grad_norm": 2.0125011360505596e-05, + "learning_rate": 9.903684330206152e-08, + "loss": 0.0, + "num_input_tokens_seen": 16250320, + "step": 33015 + }, + { + "epoch": 4.357925300250759, + "grad_norm": 0.0007408508099615574, + "learning_rate": 9.8837044032003e-08, + "loss": 0.0, + "num_input_tokens_seen": 16253072, + "step": 33020 + }, + { + "epoch": 4.3585851920285075, + "grad_norm": 0.0003739091625902802, + "learning_rate": 9.863743602725627e-08, + "loss": 0.0, + "num_input_tokens_seen": 16255696, + "step": 33025 + }, + { + "epoch": 4.359245083806256, + "grad_norm": 2.6183059162576683e-05, + "learning_rate": 9.843801933018669e-08, + "loss": 0.0, + "num_input_tokens_seen": 16258256, + "step": 33030 + }, + { + "epoch": 4.359904975584004, + "grad_norm": 0.07573069632053375, + "learning_rate": 9.823879398311874e-08, + "loss": 0.0, + "num_input_tokens_seen": 16260752, + "step": 33035 + }, + { + "epoch": 4.360564867361752, + "grad_norm": 7.156374340411276e-05, + "learning_rate": 9.803976002833692e-08, + "loss": 0.0226, + "num_input_tokens_seen": 16263440, + "step": 33040 + }, + { + "epoch": 4.3612247591395015, + "grad_norm": 0.00026338372845202684, + "learning_rate": 9.78409175080841e-08, + "loss": 0.0009, + "num_input_tokens_seen": 16266000, + "step": 33045 + }, + { + "epoch": 4.36188465091725, + "grad_norm": 7.694336090935394e-05, + "learning_rate": 9.764226646456408e-08, + "loss": 0.0, + "num_input_tokens_seen": 16268624, + "step": 33050 + }, + { + "epoch": 4.362544542694998, + "grad_norm": 2.3234377295011654e-05, + "learning_rate": 9.744380693993858e-08, + "loss": 0.0, + "num_input_tokens_seen": 16270992, + "step": 33055 + }, + { + "epoch": 4.363204434472746, + "grad_norm": 0.00016954565944615752, + "learning_rate": 9.724553897632893e-08, + "loss": 0.0, + "num_input_tokens_seen": 16273424, + "step": 33060 + }, + { + "epoch": 4.363864326250495, + "grad_norm": 2.0216995835653506e-05, + "learning_rate": 9.704746261581675e-08, + "loss": 0.0441, + "num_input_tokens_seen": 16275728, + "step": 33065 + }, + { + "epoch": 4.364524218028244, + "grad_norm": 0.00014168783673085272, + "learning_rate": 9.684957790044179e-08, + "loss": 0.0, + "num_input_tokens_seen": 16277904, + "step": 33070 + }, + { + "epoch": 4.365184109805992, + "grad_norm": 0.0001042889998643659, + "learning_rate": 9.665188487220399e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16280720, + "step": 33075 + }, + { + "epoch": 4.36584400158374, + "grad_norm": 6.27780391369015e-05, + "learning_rate": 9.64543835730619e-08, + "loss": 0.0, + "num_input_tokens_seen": 16283088, + "step": 33080 + }, + { + "epoch": 4.366503893361489, + "grad_norm": 2.529203447920736e-05, + "learning_rate": 9.625707404493399e-08, + "loss": 0.0, + "num_input_tokens_seen": 16285520, + "step": 33085 + }, + { + "epoch": 4.367163785139237, + "grad_norm": 0.00032823492074385285, + "learning_rate": 9.605995632969787e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16287888, + "step": 33090 + }, + { + "epoch": 4.367823676916986, + "grad_norm": 19.19044303894043, + "learning_rate": 9.586303046919008e-08, + "loss": 0.0226, + "num_input_tokens_seen": 16290256, + "step": 33095 + }, + { + "epoch": 4.368483568694734, + "grad_norm": 0.0008443885017186403, + "learning_rate": 9.566629650520675e-08, + "loss": 0.0, + "num_input_tokens_seen": 16292496, + "step": 33100 + }, + { + "epoch": 4.369143460472483, + "grad_norm": 0.19148346781730652, + "learning_rate": 9.546975447950345e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16294864, + "step": 33105 + }, + { + "epoch": 4.369803352250231, + "grad_norm": 0.0007681222632527351, + "learning_rate": 9.527340443379461e-08, + "loss": 0.0, + "num_input_tokens_seen": 16297616, + "step": 33110 + }, + { + "epoch": 4.370463244027979, + "grad_norm": 0.011789199896156788, + "learning_rate": 9.507724640975412e-08, + "loss": 0.0, + "num_input_tokens_seen": 16300048, + "step": 33115 + }, + { + "epoch": 4.371123135805728, + "grad_norm": 0.00011409088619984686, + "learning_rate": 9.488128044901511e-08, + "loss": 0.0, + "num_input_tokens_seen": 16302608, + "step": 33120 + }, + { + "epoch": 4.371783027583477, + "grad_norm": 0.00021923432359471917, + "learning_rate": 9.468550659317009e-08, + "loss": 0.0715, + "num_input_tokens_seen": 16305232, + "step": 33125 + }, + { + "epoch": 4.372442919361225, + "grad_norm": 0.00023443755344487727, + "learning_rate": 9.44899248837705e-08, + "loss": 0.028, + "num_input_tokens_seen": 16307536, + "step": 33130 + }, + { + "epoch": 4.373102811138973, + "grad_norm": 1.5468593119294383e-05, + "learning_rate": 9.4294535362327e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16310160, + "step": 33135 + }, + { + "epoch": 4.373762702916721, + "grad_norm": 1.1573849405976944e-05, + "learning_rate": 9.409933807031012e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16312976, + "step": 33140 + }, + { + "epoch": 4.37442259469447, + "grad_norm": 2.0115514416829683e-05, + "learning_rate": 9.390433304914846e-08, + "loss": 0.0, + "num_input_tokens_seen": 16315216, + "step": 33145 + }, + { + "epoch": 4.375082486472219, + "grad_norm": 0.0004016650200355798, + "learning_rate": 9.370952034023061e-08, + "loss": 0.0502, + "num_input_tokens_seen": 16317584, + "step": 33150 + }, + { + "epoch": 4.375742378249967, + "grad_norm": 0.019395913928747177, + "learning_rate": 9.351489998490447e-08, + "loss": 0.0, + "num_input_tokens_seen": 16319952, + "step": 33155 + }, + { + "epoch": 4.376402270027715, + "grad_norm": 2.075076918117702e-05, + "learning_rate": 9.332047202447635e-08, + "loss": 0.0, + "num_input_tokens_seen": 16322576, + "step": 33160 + }, + { + "epoch": 4.377062161805464, + "grad_norm": 0.0009933270048350096, + "learning_rate": 9.312623650021245e-08, + "loss": 0.0, + "num_input_tokens_seen": 16325328, + "step": 33165 + }, + { + "epoch": 4.377722053583212, + "grad_norm": 0.04395154118537903, + "learning_rate": 9.29321934533378e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16327568, + "step": 33170 + }, + { + "epoch": 4.378381945360961, + "grad_norm": 0.003907402511686087, + "learning_rate": 9.273834292503668e-08, + "loss": 0.0, + "num_input_tokens_seen": 16330384, + "step": 33175 + }, + { + "epoch": 4.379041837138709, + "grad_norm": 0.04484650120139122, + "learning_rate": 9.254468495645251e-08, + "loss": 0.0, + "num_input_tokens_seen": 16332624, + "step": 33180 + }, + { + "epoch": 4.379701728916458, + "grad_norm": 6.542204937431961e-05, + "learning_rate": 9.235121958868731e-08, + "loss": 0.0, + "num_input_tokens_seen": 16334928, + "step": 33185 + }, + { + "epoch": 4.380361620694206, + "grad_norm": 6.131920963525772e-05, + "learning_rate": 9.215794686280343e-08, + "loss": 0.0004, + "num_input_tokens_seen": 16337552, + "step": 33190 + }, + { + "epoch": 4.381021512471954, + "grad_norm": 0.003037465503439307, + "learning_rate": 9.196486681982096e-08, + "loss": 0.0, + "num_input_tokens_seen": 16340112, + "step": 33195 + }, + { + "epoch": 4.381681404249703, + "grad_norm": 0.16938516497612, + "learning_rate": 9.177197950072012e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16342416, + "step": 33200 + }, + { + "epoch": 4.382341296027452, + "grad_norm": 0.004777638241648674, + "learning_rate": 9.157928494644007e-08, + "loss": 0.0366, + "num_input_tokens_seen": 16344912, + "step": 33205 + }, + { + "epoch": 4.3830011878052, + "grad_norm": 5.403992690844461e-05, + "learning_rate": 9.138678319787818e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16347728, + "step": 33210 + }, + { + "epoch": 4.383661079582948, + "grad_norm": 0.0020157424733042717, + "learning_rate": 9.119447429589212e-08, + "loss": 0.0, + "num_input_tokens_seen": 16350352, + "step": 33215 + }, + { + "epoch": 4.3843209713606965, + "grad_norm": 0.008629385381937027, + "learning_rate": 9.100235828129743e-08, + "loss": 0.0, + "num_input_tokens_seen": 16352784, + "step": 33220 + }, + { + "epoch": 4.384980863138446, + "grad_norm": 0.0016030854312703013, + "learning_rate": 9.08104351948702e-08, + "loss": 0.0066, + "num_input_tokens_seen": 16355344, + "step": 33225 + }, + { + "epoch": 4.385640754916194, + "grad_norm": 0.0016935844905674458, + "learning_rate": 9.061870507734426e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16357712, + "step": 33230 + }, + { + "epoch": 4.386300646693942, + "grad_norm": 0.00036475996603257954, + "learning_rate": 9.042716796941275e-08, + "loss": 0.0, + "num_input_tokens_seen": 16360144, + "step": 33235 + }, + { + "epoch": 4.3869605384716905, + "grad_norm": 0.0006020345608703792, + "learning_rate": 9.023582391172813e-08, + "loss": 0.0, + "num_input_tokens_seen": 16362576, + "step": 33240 + }, + { + "epoch": 4.387620430249439, + "grad_norm": 5.8977642765967175e-05, + "learning_rate": 9.004467294490203e-08, + "loss": 0.0, + "num_input_tokens_seen": 16365072, + "step": 33245 + }, + { + "epoch": 4.388280322027187, + "grad_norm": 4.70478662464302e-05, + "learning_rate": 8.98537151095048e-08, + "loss": 0.0, + "num_input_tokens_seen": 16367568, + "step": 33250 + }, + { + "epoch": 4.388940213804936, + "grad_norm": 0.0005329661653377116, + "learning_rate": 8.966295044606565e-08, + "loss": 0.0, + "num_input_tokens_seen": 16370128, + "step": 33255 + }, + { + "epoch": 4.3896001055826845, + "grad_norm": 2.0265884813852608e-05, + "learning_rate": 8.94723789950731e-08, + "loss": 0.0, + "num_input_tokens_seen": 16372688, + "step": 33260 + }, + { + "epoch": 4.390259997360433, + "grad_norm": 3.663907409645617e-05, + "learning_rate": 8.928200079697479e-08, + "loss": 0.0, + "num_input_tokens_seen": 16375120, + "step": 33265 + }, + { + "epoch": 4.390919889138181, + "grad_norm": 1.8534060716629028, + "learning_rate": 8.909181589217674e-08, + "loss": 0.0006, + "num_input_tokens_seen": 16377616, + "step": 33270 + }, + { + "epoch": 4.391579780915929, + "grad_norm": 0.001200351631268859, + "learning_rate": 8.890182432104443e-08, + "loss": 0.0, + "num_input_tokens_seen": 16380112, + "step": 33275 + }, + { + "epoch": 4.3922396726936785, + "grad_norm": 1.7528962416690774e-05, + "learning_rate": 8.871202612390249e-08, + "loss": 0.0, + "num_input_tokens_seen": 16382544, + "step": 33280 + }, + { + "epoch": 4.392899564471427, + "grad_norm": 2.8107933758292347e-05, + "learning_rate": 8.852242134103383e-08, + "loss": 0.0, + "num_input_tokens_seen": 16385104, + "step": 33285 + }, + { + "epoch": 4.393559456249175, + "grad_norm": 0.0058558168821036816, + "learning_rate": 8.833301001268078e-08, + "loss": 0.0, + "num_input_tokens_seen": 16387536, + "step": 33290 + }, + { + "epoch": 4.394219348026923, + "grad_norm": 24.756563186645508, + "learning_rate": 8.814379217904455e-08, + "loss": 0.0188, + "num_input_tokens_seen": 16389840, + "step": 33295 + }, + { + "epoch": 4.394879239804672, + "grad_norm": 1.8636386812431738e-05, + "learning_rate": 8.795476788028555e-08, + "loss": 0.0153, + "num_input_tokens_seen": 16392080, + "step": 33300 + }, + { + "epoch": 4.395539131582421, + "grad_norm": 0.0011585361789911985, + "learning_rate": 8.776593715652226e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16394384, + "step": 33305 + }, + { + "epoch": 4.396199023360169, + "grad_norm": 2.210846185684204, + "learning_rate": 8.757730004783303e-08, + "loss": 0.002, + "num_input_tokens_seen": 16397072, + "step": 33310 + }, + { + "epoch": 4.396858915137917, + "grad_norm": 0.018569767475128174, + "learning_rate": 8.738885659425477e-08, + "loss": 0.0626, + "num_input_tokens_seen": 16399696, + "step": 33315 + }, + { + "epoch": 4.397518806915666, + "grad_norm": 1.4774296687392052e-05, + "learning_rate": 8.72006068357829e-08, + "loss": 0.0, + "num_input_tokens_seen": 16402256, + "step": 33320 + }, + { + "epoch": 4.398178698693414, + "grad_norm": 0.1812116652727127, + "learning_rate": 8.701255081237225e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16404944, + "step": 33325 + }, + { + "epoch": 4.398838590471163, + "grad_norm": 0.00019690478802658617, + "learning_rate": 8.682468856393654e-08, + "loss": 0.0, + "num_input_tokens_seen": 16407248, + "step": 33330 + }, + { + "epoch": 4.399498482248911, + "grad_norm": 5.9989270084770396e-05, + "learning_rate": 8.66370201303478e-08, + "loss": 0.0, + "num_input_tokens_seen": 16409424, + "step": 33335 + }, + { + "epoch": 4.40015837402666, + "grad_norm": 0.08432416617870331, + "learning_rate": 8.644954555143757e-08, + "loss": 0.0004, + "num_input_tokens_seen": 16412048, + "step": 33340 + }, + { + "epoch": 4.400818265804408, + "grad_norm": 0.000751759042032063, + "learning_rate": 8.626226486699573e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16414736, + "step": 33345 + }, + { + "epoch": 4.401478157582156, + "grad_norm": 3.477888094494119e-05, + "learning_rate": 8.607517811677168e-08, + "loss": 0.0, + "num_input_tokens_seen": 16417232, + "step": 33350 + }, + { + "epoch": 4.402138049359905, + "grad_norm": 0.002841503359377384, + "learning_rate": 8.588828534047276e-08, + "loss": 0.0188, + "num_input_tokens_seen": 16419728, + "step": 33355 + }, + { + "epoch": 4.402797941137654, + "grad_norm": 0.0006798732210882008, + "learning_rate": 8.570158657776582e-08, + "loss": 0.0, + "num_input_tokens_seen": 16422288, + "step": 33360 + }, + { + "epoch": 4.403457832915402, + "grad_norm": 0.007670256774872541, + "learning_rate": 8.551508186827639e-08, + "loss": 0.0782, + "num_input_tokens_seen": 16424784, + "step": 33365 + }, + { + "epoch": 4.40411772469315, + "grad_norm": 0.00018999635358341038, + "learning_rate": 8.532877125158854e-08, + "loss": 0.0, + "num_input_tokens_seen": 16427280, + "step": 33370 + }, + { + "epoch": 4.4047776164708985, + "grad_norm": 0.0015463822055608034, + "learning_rate": 8.514265476724547e-08, + "loss": 0.0366, + "num_input_tokens_seen": 16429840, + "step": 33375 + }, + { + "epoch": 4.405437508248648, + "grad_norm": 5.6568584113847464e-05, + "learning_rate": 8.49567324547491e-08, + "loss": 0.0, + "num_input_tokens_seen": 16432208, + "step": 33380 + }, + { + "epoch": 4.406097400026396, + "grad_norm": 0.009160442277789116, + "learning_rate": 8.47710043535601e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16434960, + "step": 33385 + }, + { + "epoch": 4.406757291804144, + "grad_norm": 2.5621367967687547e-05, + "learning_rate": 8.458547050309794e-08, + "loss": 0.0, + "num_input_tokens_seen": 16437584, + "step": 33390 + }, + { + "epoch": 4.4074171835818925, + "grad_norm": 2.901063453464303e-05, + "learning_rate": 8.440013094274035e-08, + "loss": 0.0, + "num_input_tokens_seen": 16440144, + "step": 33395 + }, + { + "epoch": 4.408077075359641, + "grad_norm": 0.425859659910202, + "learning_rate": 8.421498571182517e-08, + "loss": 0.0004, + "num_input_tokens_seen": 16442704, + "step": 33400 + }, + { + "epoch": 4.40873696713739, + "grad_norm": 0.0017851804150268435, + "learning_rate": 8.403003484964743e-08, + "loss": 0.0, + "num_input_tokens_seen": 16445008, + "step": 33405 + }, + { + "epoch": 4.409396858915138, + "grad_norm": 2.694344766496215e-05, + "learning_rate": 8.384527839546196e-08, + "loss": 0.0, + "num_input_tokens_seen": 16447248, + "step": 33410 + }, + { + "epoch": 4.4100567506928865, + "grad_norm": 0.001166831818409264, + "learning_rate": 8.366071638848183e-08, + "loss": 0.0, + "num_input_tokens_seen": 16450128, + "step": 33415 + }, + { + "epoch": 4.410716642470635, + "grad_norm": 0.00015432581130880862, + "learning_rate": 8.347634886787901e-08, + "loss": 0.028, + "num_input_tokens_seen": 16452752, + "step": 33420 + }, + { + "epoch": 4.411376534248383, + "grad_norm": 6.231493171071634e-05, + "learning_rate": 8.329217587278437e-08, + "loss": 0.0, + "num_input_tokens_seen": 16455248, + "step": 33425 + }, + { + "epoch": 4.412036426026131, + "grad_norm": 0.00045959983253851533, + "learning_rate": 8.310819744228691e-08, + "loss": 0.0, + "num_input_tokens_seen": 16457616, + "step": 33430 + }, + { + "epoch": 4.4126963178038805, + "grad_norm": 0.001372020342387259, + "learning_rate": 8.29244136154349e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16459984, + "step": 33435 + }, + { + "epoch": 4.413356209581629, + "grad_norm": 2.6552370400168e-05, + "learning_rate": 8.274082443123543e-08, + "loss": 0.0, + "num_input_tokens_seen": 16462480, + "step": 33440 + }, + { + "epoch": 4.414016101359377, + "grad_norm": 0.0005929334438405931, + "learning_rate": 8.255742992865356e-08, + "loss": 0.0308, + "num_input_tokens_seen": 16465040, + "step": 33445 + }, + { + "epoch": 4.414675993137125, + "grad_norm": 0.00015467203047592193, + "learning_rate": 8.237423014661348e-08, + "loss": 0.0, + "num_input_tokens_seen": 16467728, + "step": 33450 + }, + { + "epoch": 4.415335884914874, + "grad_norm": 0.0003078359295614064, + "learning_rate": 8.219122512399813e-08, + "loss": 0.0, + "num_input_tokens_seen": 16469968, + "step": 33455 + }, + { + "epoch": 4.415995776692623, + "grad_norm": 2.4373392079724e-05, + "learning_rate": 8.200841489964927e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16472592, + "step": 33460 + }, + { + "epoch": 4.416655668470371, + "grad_norm": 0.0010042509529739618, + "learning_rate": 8.182579951236657e-08, + "loss": 0.0, + "num_input_tokens_seen": 16475024, + "step": 33465 + }, + { + "epoch": 4.417315560248119, + "grad_norm": 7.21759715816006e-05, + "learning_rate": 8.164337900090901e-08, + "loss": 0.0, + "num_input_tokens_seen": 16477520, + "step": 33470 + }, + { + "epoch": 4.417975452025868, + "grad_norm": 7.079127681208774e-05, + "learning_rate": 8.146115340399418e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16480016, + "step": 33475 + }, + { + "epoch": 4.418635343803616, + "grad_norm": 1.6037702152971178e-05, + "learning_rate": 8.127912276029781e-08, + "loss": 0.0, + "num_input_tokens_seen": 16482256, + "step": 33480 + }, + { + "epoch": 4.419295235581365, + "grad_norm": 5.478865568875335e-05, + "learning_rate": 8.109728710845488e-08, + "loss": 0.0, + "num_input_tokens_seen": 16484496, + "step": 33485 + }, + { + "epoch": 4.419955127359113, + "grad_norm": 2.6513811462791637e-05, + "learning_rate": 8.091564648705874e-08, + "loss": 0.0, + "num_input_tokens_seen": 16486864, + "step": 33490 + }, + { + "epoch": 4.420615019136862, + "grad_norm": 0.00030529368086718023, + "learning_rate": 8.073420093466087e-08, + "loss": 0.0006, + "num_input_tokens_seen": 16489168, + "step": 33495 + }, + { + "epoch": 4.42127491091461, + "grad_norm": 0.0018798833480104804, + "learning_rate": 8.055295048977218e-08, + "loss": 0.0426, + "num_input_tokens_seen": 16491792, + "step": 33500 + }, + { + "epoch": 4.421934802692358, + "grad_norm": 0.00010593536717351526, + "learning_rate": 8.037189519086163e-08, + "loss": 0.0472, + "num_input_tokens_seen": 16494096, + "step": 33505 + }, + { + "epoch": 4.4225946944701064, + "grad_norm": 0.00031165831023827195, + "learning_rate": 8.019103507635704e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16496720, + "step": 33510 + }, + { + "epoch": 4.423254586247856, + "grad_norm": 0.00030534231336787343, + "learning_rate": 8.00103701846443e-08, + "loss": 0.0, + "num_input_tokens_seen": 16499152, + "step": 33515 + }, + { + "epoch": 4.423914478025604, + "grad_norm": 6.27004337310791, + "learning_rate": 7.982990055406846e-08, + "loss": 0.0025, + "num_input_tokens_seen": 16501520, + "step": 33520 + }, + { + "epoch": 4.424574369803352, + "grad_norm": 0.00011541576532181352, + "learning_rate": 7.964962622293314e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16503824, + "step": 33525 + }, + { + "epoch": 4.4252342615811004, + "grad_norm": 2.0695533748948947e-05, + "learning_rate": 7.946954722949972e-08, + "loss": 0.0, + "num_input_tokens_seen": 16506512, + "step": 33530 + }, + { + "epoch": 4.425894153358849, + "grad_norm": 2.451527507218998e-05, + "learning_rate": 7.928966361198897e-08, + "loss": 0.0, + "num_input_tokens_seen": 16508880, + "step": 33535 + }, + { + "epoch": 4.426554045136598, + "grad_norm": 1.954801700776443e-05, + "learning_rate": 7.910997540858011e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16511120, + "step": 33540 + }, + { + "epoch": 4.427213936914346, + "grad_norm": 0.00020451426098588854, + "learning_rate": 7.89304826574102e-08, + "loss": 0.0, + "num_input_tokens_seen": 16513424, + "step": 33545 + }, + { + "epoch": 4.4278738286920944, + "grad_norm": 0.003469746559858322, + "learning_rate": 7.875118539657566e-08, + "loss": 0.0, + "num_input_tokens_seen": 16515664, + "step": 33550 + }, + { + "epoch": 4.428533720469843, + "grad_norm": 0.00537085859104991, + "learning_rate": 7.857208366413048e-08, + "loss": 0.0, + "num_input_tokens_seen": 16518224, + "step": 33555 + }, + { + "epoch": 4.429193612247591, + "grad_norm": 4.522494418779388e-05, + "learning_rate": 7.839317749808838e-08, + "loss": 0.0006, + "num_input_tokens_seen": 16520528, + "step": 33560 + }, + { + "epoch": 4.42985350402534, + "grad_norm": 7.53160347812809e-05, + "learning_rate": 7.821446693642064e-08, + "loss": 0.0, + "num_input_tokens_seen": 16522896, + "step": 33565 + }, + { + "epoch": 4.4305133958030885, + "grad_norm": 0.0006427129846997559, + "learning_rate": 7.803595201705692e-08, + "loss": 0.0, + "num_input_tokens_seen": 16525392, + "step": 33570 + }, + { + "epoch": 4.431173287580837, + "grad_norm": 0.0001272416702704504, + "learning_rate": 7.785763277788648e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16527952, + "step": 33575 + }, + { + "epoch": 4.431833179358585, + "grad_norm": 0.02497689425945282, + "learning_rate": 7.767950925675559e-08, + "loss": 0.0, + "num_input_tokens_seen": 16530384, + "step": 33580 + }, + { + "epoch": 4.432493071136333, + "grad_norm": 0.0012441120343282819, + "learning_rate": 7.750158149147012e-08, + "loss": 0.0, + "num_input_tokens_seen": 16532752, + "step": 33585 + }, + { + "epoch": 4.4331529629140825, + "grad_norm": 0.00025881710462272167, + "learning_rate": 7.732384951979354e-08, + "loss": 0.0, + "num_input_tokens_seen": 16535248, + "step": 33590 + }, + { + "epoch": 4.433812854691831, + "grad_norm": 0.03984666243195534, + "learning_rate": 7.714631337944854e-08, + "loss": 0.0, + "num_input_tokens_seen": 16537680, + "step": 33595 + }, + { + "epoch": 4.434472746469579, + "grad_norm": 0.004693345166742802, + "learning_rate": 7.696897310811579e-08, + "loss": 0.0, + "num_input_tokens_seen": 16540304, + "step": 33600 + }, + { + "epoch": 4.435132638247327, + "grad_norm": 0.13585327565670013, + "learning_rate": 7.679182874343437e-08, + "loss": 0.092, + "num_input_tokens_seen": 16542992, + "step": 33605 + }, + { + "epoch": 4.435792530025076, + "grad_norm": 0.0002721291675698012, + "learning_rate": 7.66148803230019e-08, + "loss": 0.0072, + "num_input_tokens_seen": 16545616, + "step": 33610 + }, + { + "epoch": 4.436452421802825, + "grad_norm": 0.9574349522590637, + "learning_rate": 7.643812788437454e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16548048, + "step": 33615 + }, + { + "epoch": 4.437112313580573, + "grad_norm": 0.00035204915911890566, + "learning_rate": 7.626157146506651e-08, + "loss": 0.0, + "num_input_tokens_seen": 16550288, + "step": 33620 + }, + { + "epoch": 4.437772205358321, + "grad_norm": 0.0347675122320652, + "learning_rate": 7.608521110255084e-08, + "loss": 0.008, + "num_input_tokens_seen": 16552720, + "step": 33625 + }, + { + "epoch": 4.43843209713607, + "grad_norm": 6.068991933716461e-05, + "learning_rate": 7.590904683425858e-08, + "loss": 0.0, + "num_input_tokens_seen": 16555024, + "step": 33630 + }, + { + "epoch": 4.439091988913818, + "grad_norm": 0.004437305498868227, + "learning_rate": 7.57330786975795e-08, + "loss": 0.0, + "num_input_tokens_seen": 16557520, + "step": 33635 + }, + { + "epoch": 4.439751880691567, + "grad_norm": 5.0391710828989744e-05, + "learning_rate": 7.555730672986138e-08, + "loss": 0.0, + "num_input_tokens_seen": 16559824, + "step": 33640 + }, + { + "epoch": 4.440411772469315, + "grad_norm": 1.7985103113460355e-05, + "learning_rate": 7.53817309684106e-08, + "loss": 0.0, + "num_input_tokens_seen": 16562256, + "step": 33645 + }, + { + "epoch": 4.441071664247064, + "grad_norm": 0.0015585446963086724, + "learning_rate": 7.520635145049193e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16564688, + "step": 33650 + }, + { + "epoch": 4.441731556024812, + "grad_norm": 0.0017445468110963702, + "learning_rate": 7.503116821332834e-08, + "loss": 0.028, + "num_input_tokens_seen": 16566928, + "step": 33655 + }, + { + "epoch": 4.44239144780256, + "grad_norm": 0.010277300141751766, + "learning_rate": 7.485618129410109e-08, + "loss": 0.0, + "num_input_tokens_seen": 16569296, + "step": 33660 + }, + { + "epoch": 4.443051339580309, + "grad_norm": 2.531162681407295e-05, + "learning_rate": 7.468139072994994e-08, + "loss": 0.0, + "num_input_tokens_seen": 16571728, + "step": 33665 + }, + { + "epoch": 4.443711231358058, + "grad_norm": 1.1785974502563477, + "learning_rate": 7.450679655797321e-08, + "loss": 0.0015, + "num_input_tokens_seen": 16574160, + "step": 33670 + }, + { + "epoch": 4.444371123135806, + "grad_norm": 1.7777702808380127, + "learning_rate": 7.433239881522691e-08, + "loss": 0.0018, + "num_input_tokens_seen": 16576336, + "step": 33675 + }, + { + "epoch": 4.445031014913554, + "grad_norm": 0.07361600548028946, + "learning_rate": 7.415819753872576e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16578768, + "step": 33680 + }, + { + "epoch": 4.445690906691302, + "grad_norm": 0.00013445514196064323, + "learning_rate": 7.398419276544287e-08, + "loss": 0.0, + "num_input_tokens_seen": 16581136, + "step": 33685 + }, + { + "epoch": 4.446350798469051, + "grad_norm": 0.0029635755345225334, + "learning_rate": 7.381038453230925e-08, + "loss": 0.0049, + "num_input_tokens_seen": 16583568, + "step": 33690 + }, + { + "epoch": 4.4470106902468, + "grad_norm": 9.411584854125977, + "learning_rate": 7.363677287621462e-08, + "loss": 0.028, + "num_input_tokens_seen": 16586000, + "step": 33695 + }, + { + "epoch": 4.447670582024548, + "grad_norm": 0.00021924672182649374, + "learning_rate": 7.346335783400693e-08, + "loss": 0.0, + "num_input_tokens_seen": 16588368, + "step": 33700 + }, + { + "epoch": 4.448330473802296, + "grad_norm": 0.0011029281886294484, + "learning_rate": 7.329013944249186e-08, + "loss": 0.0, + "num_input_tokens_seen": 16590736, + "step": 33705 + }, + { + "epoch": 4.448990365580045, + "grad_norm": 4.7456309403060004e-05, + "learning_rate": 7.311711773843399e-08, + "loss": 0.0, + "num_input_tokens_seen": 16593168, + "step": 33710 + }, + { + "epoch": 4.449650257357793, + "grad_norm": 0.003539201570674777, + "learning_rate": 7.294429275855596e-08, + "loss": 0.0294, + "num_input_tokens_seen": 16595472, + "step": 33715 + }, + { + "epoch": 4.450310149135542, + "grad_norm": 4.4052645534975454e-05, + "learning_rate": 7.277166453953865e-08, + "loss": 0.0, + "num_input_tokens_seen": 16597584, + "step": 33720 + }, + { + "epoch": 4.45097004091329, + "grad_norm": 6.079300874262117e-05, + "learning_rate": 7.259923311802119e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16600080, + "step": 33725 + }, + { + "epoch": 4.451629932691039, + "grad_norm": 9.078793482331093e-06, + "learning_rate": 7.242699853060041e-08, + "loss": 0.0, + "num_input_tokens_seen": 16602576, + "step": 33730 + }, + { + "epoch": 4.452289824468787, + "grad_norm": 0.0034223340917378664, + "learning_rate": 7.225496081383264e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16605200, + "step": 33735 + }, + { + "epoch": 4.452949716246535, + "grad_norm": 2.6993599021807313e-05, + "learning_rate": 7.2083120004231e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16607568, + "step": 33740 + }, + { + "epoch": 4.453609608024284, + "grad_norm": 4.951494702254422e-05, + "learning_rate": 7.191147613826787e-08, + "loss": 0.0, + "num_input_tokens_seen": 16610448, + "step": 33745 + }, + { + "epoch": 4.454269499802033, + "grad_norm": 3.3548680221429095e-05, + "learning_rate": 7.17400292523731e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16613136, + "step": 33750 + }, + { + "epoch": 4.454929391579781, + "grad_norm": 5.778231570729986e-05, + "learning_rate": 7.156877938293515e-08, + "loss": 0.0, + "num_input_tokens_seen": 16615632, + "step": 33755 + }, + { + "epoch": 4.455589283357529, + "grad_norm": 5.531049828277901e-05, + "learning_rate": 7.139772656630083e-08, + "loss": 0.0, + "num_input_tokens_seen": 16618192, + "step": 33760 + }, + { + "epoch": 4.4562491751352775, + "grad_norm": 0.0002507556928321719, + "learning_rate": 7.122687083877422e-08, + "loss": 0.0, + "num_input_tokens_seen": 16620496, + "step": 33765 + }, + { + "epoch": 4.456909066913026, + "grad_norm": 1.9328092093928717e-05, + "learning_rate": 7.105621223661906e-08, + "loss": 0.0381, + "num_input_tokens_seen": 16622864, + "step": 33770 + }, + { + "epoch": 4.457568958690775, + "grad_norm": 0.000141787197208032, + "learning_rate": 7.088575079605585e-08, + "loss": 0.0, + "num_input_tokens_seen": 16625360, + "step": 33775 + }, + { + "epoch": 4.458228850468523, + "grad_norm": 1.3037359167356044e-05, + "learning_rate": 7.071548655326387e-08, + "loss": 0.0, + "num_input_tokens_seen": 16627856, + "step": 33780 + }, + { + "epoch": 4.4588887422462715, + "grad_norm": 0.00021317604114301503, + "learning_rate": 7.054541954438053e-08, + "loss": 0.0, + "num_input_tokens_seen": 16630544, + "step": 33785 + }, + { + "epoch": 4.45954863402402, + "grad_norm": 0.0002955278323497623, + "learning_rate": 7.03755498055012e-08, + "loss": 0.0, + "num_input_tokens_seen": 16633104, + "step": 33790 + }, + { + "epoch": 4.460208525801768, + "grad_norm": 1.046508550643921, + "learning_rate": 7.02058773726798e-08, + "loss": 0.0386, + "num_input_tokens_seen": 16635728, + "step": 33795 + }, + { + "epoch": 4.460868417579517, + "grad_norm": 0.00010222404671367258, + "learning_rate": 7.003640228192775e-08, + "loss": 0.0, + "num_input_tokens_seen": 16637904, + "step": 33800 + }, + { + "epoch": 4.4615283093572655, + "grad_norm": 0.0018340600654482841, + "learning_rate": 6.986712456921506e-08, + "loss": 0.0, + "num_input_tokens_seen": 16640208, + "step": 33805 + }, + { + "epoch": 4.462188201135014, + "grad_norm": 0.0010408456437289715, + "learning_rate": 6.969804427046988e-08, + "loss": 0.0, + "num_input_tokens_seen": 16642640, + "step": 33810 + }, + { + "epoch": 4.462848092912762, + "grad_norm": 4.860827175434679e-05, + "learning_rate": 6.952916142157783e-08, + "loss": 0.0239, + "num_input_tokens_seen": 16645136, + "step": 33815 + }, + { + "epoch": 4.46350798469051, + "grad_norm": 3.999754699179903e-05, + "learning_rate": 6.936047605838347e-08, + "loss": 0.0, + "num_input_tokens_seen": 16647376, + "step": 33820 + }, + { + "epoch": 4.4641678764682595, + "grad_norm": 4.946894841850735e-05, + "learning_rate": 6.919198821668892e-08, + "loss": 0.0, + "num_input_tokens_seen": 16649616, + "step": 33825 + }, + { + "epoch": 4.464827768246008, + "grad_norm": 5.7902558182831854e-05, + "learning_rate": 6.902369793225437e-08, + "loss": 0.0, + "num_input_tokens_seen": 16652048, + "step": 33830 + }, + { + "epoch": 4.465487660023756, + "grad_norm": 0.0020352238789200783, + "learning_rate": 6.885560524079837e-08, + "loss": 0.0, + "num_input_tokens_seen": 16654544, + "step": 33835 + }, + { + "epoch": 4.466147551801504, + "grad_norm": 0.0005612990353256464, + "learning_rate": 6.868771017799735e-08, + "loss": 0.001, + "num_input_tokens_seen": 16657104, + "step": 33840 + }, + { + "epoch": 4.466807443579253, + "grad_norm": 12.53954792022705, + "learning_rate": 6.852001277948593e-08, + "loss": 0.0366, + "num_input_tokens_seen": 16659600, + "step": 33845 + }, + { + "epoch": 4.467467335357002, + "grad_norm": 0.00031025375938043, + "learning_rate": 6.835251308085644e-08, + "loss": 0.0, + "num_input_tokens_seen": 16662352, + "step": 33850 + }, + { + "epoch": 4.46812722713475, + "grad_norm": 2.4980216039693914e-05, + "learning_rate": 6.818521111765952e-08, + "loss": 0.0, + "num_input_tokens_seen": 16664592, + "step": 33855 + }, + { + "epoch": 4.468787118912498, + "grad_norm": 1.1097313290520106e-05, + "learning_rate": 6.801810692540411e-08, + "loss": 0.0, + "num_input_tokens_seen": 16667216, + "step": 33860 + }, + { + "epoch": 4.469447010690247, + "grad_norm": 0.00029235193505883217, + "learning_rate": 6.78512005395564e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16669776, + "step": 33865 + }, + { + "epoch": 4.470106902467995, + "grad_norm": 5.69798139622435e-05, + "learning_rate": 6.768449199554127e-08, + "loss": 0.0, + "num_input_tokens_seen": 16672208, + "step": 33870 + }, + { + "epoch": 4.470766794245744, + "grad_norm": 7.299717253772542e-05, + "learning_rate": 6.751798132874154e-08, + "loss": 0.0, + "num_input_tokens_seen": 16674512, + "step": 33875 + }, + { + "epoch": 4.471426686023492, + "grad_norm": 0.32424068450927734, + "learning_rate": 6.73516685744977e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16676816, + "step": 33880 + }, + { + "epoch": 4.472086577801241, + "grad_norm": 5.323095683706924e-05, + "learning_rate": 6.718555376810864e-08, + "loss": 0.0192, + "num_input_tokens_seen": 16679376, + "step": 33885 + }, + { + "epoch": 4.472746469578989, + "grad_norm": 0.0003650693688541651, + "learning_rate": 6.70196369448306e-08, + "loss": 0.0, + "num_input_tokens_seen": 16681808, + "step": 33890 + }, + { + "epoch": 4.473406361356737, + "grad_norm": 8.204561163438484e-05, + "learning_rate": 6.685391813987873e-08, + "loss": 0.0, + "num_input_tokens_seen": 16684560, + "step": 33895 + }, + { + "epoch": 4.474066253134486, + "grad_norm": 0.00024618374300189316, + "learning_rate": 6.668839738842547e-08, + "loss": 0.0239, + "num_input_tokens_seen": 16687056, + "step": 33900 + }, + { + "epoch": 4.474726144912235, + "grad_norm": 80.98246002197266, + "learning_rate": 6.652307472560103e-08, + "loss": 0.0666, + "num_input_tokens_seen": 16689424, + "step": 33905 + }, + { + "epoch": 4.475386036689983, + "grad_norm": 3.296041177236475e-05, + "learning_rate": 6.635795018649459e-08, + "loss": 0.0, + "num_input_tokens_seen": 16691856, + "step": 33910 + }, + { + "epoch": 4.476045928467731, + "grad_norm": 9.287385940551758, + "learning_rate": 6.61930238061521e-08, + "loss": 0.0266, + "num_input_tokens_seen": 16694288, + "step": 33915 + }, + { + "epoch": 4.4767058202454795, + "grad_norm": 11.796046257019043, + "learning_rate": 6.602829561957846e-08, + "loss": 0.0395, + "num_input_tokens_seen": 16696976, + "step": 33920 + }, + { + "epoch": 4.477365712023229, + "grad_norm": 0.00013667892199009657, + "learning_rate": 6.586376566173556e-08, + "loss": 0.0, + "num_input_tokens_seen": 16699536, + "step": 33925 + }, + { + "epoch": 4.478025603800977, + "grad_norm": 3.166230089846067e-05, + "learning_rate": 6.569943396754396e-08, + "loss": 0.0009, + "num_input_tokens_seen": 16701904, + "step": 33930 + }, + { + "epoch": 4.478685495578725, + "grad_norm": 6.734608177794144e-05, + "learning_rate": 6.553530057188206e-08, + "loss": 0.0, + "num_input_tokens_seen": 16704272, + "step": 33935 + }, + { + "epoch": 4.4793453873564735, + "grad_norm": 8.24275120976381e-05, + "learning_rate": 6.537136550958545e-08, + "loss": 0.0, + "num_input_tokens_seen": 16706896, + "step": 33940 + }, + { + "epoch": 4.480005279134222, + "grad_norm": 2.8194315433502197, + "learning_rate": 6.52076288154485e-08, + "loss": 0.0016, + "num_input_tokens_seen": 16709008, + "step": 33945 + }, + { + "epoch": 4.48066517091197, + "grad_norm": 8.94250202178955, + "learning_rate": 6.504409052422332e-08, + "loss": 0.007, + "num_input_tokens_seen": 16711440, + "step": 33950 + }, + { + "epoch": 4.481325062689719, + "grad_norm": 717.7362060546875, + "learning_rate": 6.488075067061927e-08, + "loss": 0.0969, + "num_input_tokens_seen": 16714128, + "step": 33955 + }, + { + "epoch": 4.4819849544674675, + "grad_norm": 0.0002321966312592849, + "learning_rate": 6.471760928930436e-08, + "loss": 0.0, + "num_input_tokens_seen": 16716560, + "step": 33960 + }, + { + "epoch": 4.482644846245216, + "grad_norm": 0.0001448716939194128, + "learning_rate": 6.455466641490403e-08, + "loss": 0.0, + "num_input_tokens_seen": 16719120, + "step": 33965 + }, + { + "epoch": 4.483304738022964, + "grad_norm": 0.00013773588580079377, + "learning_rate": 6.439192208200195e-08, + "loss": 0.0, + "num_input_tokens_seen": 16721552, + "step": 33970 + }, + { + "epoch": 4.483964629800712, + "grad_norm": 6.926531932549551e-05, + "learning_rate": 6.422937632513914e-08, + "loss": 0.0, + "num_input_tokens_seen": 16724304, + "step": 33975 + }, + { + "epoch": 4.4846245215784615, + "grad_norm": 0.0003329158644191921, + "learning_rate": 6.40670291788149e-08, + "loss": 0.0, + "num_input_tokens_seen": 16726992, + "step": 33980 + }, + { + "epoch": 4.48528441335621, + "grad_norm": 0.00022437769803218544, + "learning_rate": 6.390488067748634e-08, + "loss": 0.0, + "num_input_tokens_seen": 16729488, + "step": 33985 + }, + { + "epoch": 4.485944305133958, + "grad_norm": 0.12374948710203171, + "learning_rate": 6.374293085556814e-08, + "loss": 0.0, + "num_input_tokens_seen": 16731920, + "step": 33990 + }, + { + "epoch": 4.486604196911706, + "grad_norm": 0.0772160217165947, + "learning_rate": 6.358117974743293e-08, + "loss": 0.0, + "num_input_tokens_seen": 16734416, + "step": 33995 + }, + { + "epoch": 4.487264088689455, + "grad_norm": 0.0010211360640823841, + "learning_rate": 6.341962738741125e-08, + "loss": 0.0, + "num_input_tokens_seen": 16737104, + "step": 34000 + }, + { + "epoch": 4.487923980467204, + "grad_norm": 4.8965968744596466e-05, + "learning_rate": 6.325827380979176e-08, + "loss": 0.0, + "num_input_tokens_seen": 16739536, + "step": 34005 + }, + { + "epoch": 4.488583872244952, + "grad_norm": 0.00012408196926116943, + "learning_rate": 6.309711904882009e-08, + "loss": 0.0, + "num_input_tokens_seen": 16741712, + "step": 34010 + }, + { + "epoch": 4.4892437640227, + "grad_norm": 2.4676581233507022e-05, + "learning_rate": 6.293616313870032e-08, + "loss": 0.0, + "num_input_tokens_seen": 16743824, + "step": 34015 + }, + { + "epoch": 4.489903655800449, + "grad_norm": 8.535667438991368e-05, + "learning_rate": 6.277540611359445e-08, + "loss": 0.0, + "num_input_tokens_seen": 16746256, + "step": 34020 + }, + { + "epoch": 4.490563547578197, + "grad_norm": 0.15944455564022064, + "learning_rate": 6.261484800762163e-08, + "loss": 0.0, + "num_input_tokens_seen": 16748624, + "step": 34025 + }, + { + "epoch": 4.491223439355946, + "grad_norm": 2.235090323665645e-05, + "learning_rate": 6.245448885485938e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16751248, + "step": 34030 + }, + { + "epoch": 4.491883331133694, + "grad_norm": 0.006533946376293898, + "learning_rate": 6.229432868934281e-08, + "loss": 0.0, + "num_input_tokens_seen": 16753680, + "step": 34035 + }, + { + "epoch": 4.492543222911443, + "grad_norm": 9.534538548905402e-05, + "learning_rate": 6.21343675450644e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16756240, + "step": 34040 + }, + { + "epoch": 4.493203114689191, + "grad_norm": 0.0004805122152902186, + "learning_rate": 6.19746054559751e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16758672, + "step": 34045 + }, + { + "epoch": 4.493863006466939, + "grad_norm": 0.258656769990921, + "learning_rate": 6.181504245598312e-08, + "loss": 0.0, + "num_input_tokens_seen": 16760848, + "step": 34050 + }, + { + "epoch": 4.494522898244687, + "grad_norm": 2.5847604774753563e-05, + "learning_rate": 6.165567857895471e-08, + "loss": 0.0, + "num_input_tokens_seen": 16763344, + "step": 34055 + }, + { + "epoch": 4.495182790022437, + "grad_norm": 0.6106637120246887, + "learning_rate": 6.149651385871358e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16765904, + "step": 34060 + }, + { + "epoch": 4.495842681800185, + "grad_norm": 0.002589078852906823, + "learning_rate": 6.133754832904092e-08, + "loss": 0.0, + "num_input_tokens_seen": 16768336, + "step": 34065 + }, + { + "epoch": 4.496502573577933, + "grad_norm": 6.54537943773903e-05, + "learning_rate": 6.117878202367677e-08, + "loss": 0.0213, + "num_input_tokens_seen": 16770832, + "step": 34070 + }, + { + "epoch": 4.497162465355681, + "grad_norm": 0.14802023768424988, + "learning_rate": 6.102021497631749e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16773264, + "step": 34075 + }, + { + "epoch": 4.49782235713343, + "grad_norm": 1.6509698980371468e-05, + "learning_rate": 6.086184722061826e-08, + "loss": 0.0103, + "num_input_tokens_seen": 16775824, + "step": 34080 + }, + { + "epoch": 4.498482248911179, + "grad_norm": 0.0005468002054840326, + "learning_rate": 6.070367879019101e-08, + "loss": 0.0014, + "num_input_tokens_seen": 16778064, + "step": 34085 + }, + { + "epoch": 4.499142140688927, + "grad_norm": 1.2052417332597543e-05, + "learning_rate": 6.054570971860618e-08, + "loss": 0.0007, + "num_input_tokens_seen": 16780624, + "step": 34090 + }, + { + "epoch": 4.499802032466675, + "grad_norm": 1.0952991247177124, + "learning_rate": 6.038794003939151e-08, + "loss": 0.0242, + "num_input_tokens_seen": 16783248, + "step": 34095 + }, + { + "epoch": 4.500461924244424, + "grad_norm": 1.677956424828153e-05, + "learning_rate": 6.023036978603213e-08, + "loss": 0.0016, + "num_input_tokens_seen": 16785552, + "step": 34100 + }, + { + "epoch": 4.501121816022172, + "grad_norm": 4.611305848811753e-05, + "learning_rate": 6.007299899197194e-08, + "loss": 0.0, + "num_input_tokens_seen": 16787728, + "step": 34105 + }, + { + "epoch": 4.501781707799921, + "grad_norm": 0.00016956614854279906, + "learning_rate": 5.991582769061121e-08, + "loss": 0.0, + "num_input_tokens_seen": 16790288, + "step": 34110 + }, + { + "epoch": 4.501781707799921, + "eval_loss": 0.2763582170009613, + "eval_runtime": 7.8101, + "eval_samples_per_second": 862.341, + "eval_steps_per_second": 107.809, + "num_input_tokens_seen": 16790288, + "step": 34110 + }, + { + "epoch": 4.502441599577669, + "grad_norm": 0.00017157703405246139, + "learning_rate": 5.975885591530827e-08, + "loss": 0.0, + "num_input_tokens_seen": 16792848, + "step": 34115 + }, + { + "epoch": 4.503101491355418, + "grad_norm": 0.0001700647408142686, + "learning_rate": 5.9602083699379577e-08, + "loss": 0.0518, + "num_input_tokens_seen": 16795408, + "step": 34120 + }, + { + "epoch": 4.503761383133166, + "grad_norm": 2.089840199914761e-05, + "learning_rate": 5.9445511076098745e-08, + "loss": 0.028, + "num_input_tokens_seen": 16798096, + "step": 34125 + }, + { + "epoch": 4.504421274910914, + "grad_norm": 6.222442607395351e-05, + "learning_rate": 5.92891380786974e-08, + "loss": 0.0, + "num_input_tokens_seen": 16800528, + "step": 34130 + }, + { + "epoch": 4.505081166688663, + "grad_norm": 3.297501098131761e-05, + "learning_rate": 5.913296474036422e-08, + "loss": 0.0, + "num_input_tokens_seen": 16803024, + "step": 34135 + }, + { + "epoch": 4.505741058466412, + "grad_norm": 1.1789659765781835e-05, + "learning_rate": 5.8976991094246034e-08, + "loss": 0.0, + "num_input_tokens_seen": 16805456, + "step": 34140 + }, + { + "epoch": 4.50640095024416, + "grad_norm": 0.002153146080672741, + "learning_rate": 5.882121717344735e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16807632, + "step": 34145 + }, + { + "epoch": 4.507060842021908, + "grad_norm": 0.0005576178664341569, + "learning_rate": 5.866564301102972e-08, + "loss": 0.0, + "num_input_tokens_seen": 16810256, + "step": 34150 + }, + { + "epoch": 4.5077207337996565, + "grad_norm": 2.7772233486175537, + "learning_rate": 5.851026864001263e-08, + "loss": 0.0047, + "num_input_tokens_seen": 16813008, + "step": 34155 + }, + { + "epoch": 4.508380625577406, + "grad_norm": 0.06701714545488358, + "learning_rate": 5.835509409337358e-08, + "loss": 0.0294, + "num_input_tokens_seen": 16815376, + "step": 34160 + }, + { + "epoch": 4.509040517355154, + "grad_norm": 8.746223102207296e-06, + "learning_rate": 5.820011940404668e-08, + "loss": 0.0, + "num_input_tokens_seen": 16817680, + "step": 34165 + }, + { + "epoch": 4.509700409132902, + "grad_norm": 0.0006987557862885296, + "learning_rate": 5.804534460492449e-08, + "loss": 0.0, + "num_input_tokens_seen": 16820368, + "step": 34170 + }, + { + "epoch": 4.5103603009106505, + "grad_norm": 1.2826088095607702e-05, + "learning_rate": 5.789076972885687e-08, + "loss": 0.0, + "num_input_tokens_seen": 16822672, + "step": 34175 + }, + { + "epoch": 4.511020192688399, + "grad_norm": 0.0025306891184300184, + "learning_rate": 5.7736394808651226e-08, + "loss": 0.0, + "num_input_tokens_seen": 16824976, + "step": 34180 + }, + { + "epoch": 4.511680084466148, + "grad_norm": 0.0032583356369286776, + "learning_rate": 5.758221987707235e-08, + "loss": 0.0, + "num_input_tokens_seen": 16827472, + "step": 34185 + }, + { + "epoch": 4.512339976243896, + "grad_norm": 6.325223512249067e-05, + "learning_rate": 5.742824496684284e-08, + "loss": 0.0, + "num_input_tokens_seen": 16829840, + "step": 34190 + }, + { + "epoch": 4.5129998680216445, + "grad_norm": 4.075995457242243e-05, + "learning_rate": 5.72744701106429e-08, + "loss": 0.0, + "num_input_tokens_seen": 16832400, + "step": 34195 + }, + { + "epoch": 4.513659759799393, + "grad_norm": 0.00010375280544394627, + "learning_rate": 5.7120895341109864e-08, + "loss": 0.0016, + "num_input_tokens_seen": 16834832, + "step": 34200 + }, + { + "epoch": 4.514319651577141, + "grad_norm": 5.2472357749938965, + "learning_rate": 5.696752069083899e-08, + "loss": 0.0066, + "num_input_tokens_seen": 16837200, + "step": 34205 + }, + { + "epoch": 4.51497954335489, + "grad_norm": 8.740870725887362e-06, + "learning_rate": 5.6814346192383125e-08, + "loss": 0.0, + "num_input_tokens_seen": 16839632, + "step": 34210 + }, + { + "epoch": 4.5156394351326385, + "grad_norm": 6.086594657972455e-05, + "learning_rate": 5.666137187825204e-08, + "loss": 0.0, + "num_input_tokens_seen": 16842128, + "step": 34215 + }, + { + "epoch": 4.516299326910387, + "grad_norm": 0.0001888852275442332, + "learning_rate": 5.650859778091388e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16844240, + "step": 34220 + }, + { + "epoch": 4.516959218688135, + "grad_norm": 1.9013299606740475e-05, + "learning_rate": 5.635602393279326e-08, + "loss": 0.0, + "num_input_tokens_seen": 16846352, + "step": 34225 + }, + { + "epoch": 4.517619110465883, + "grad_norm": 2.5412498871446587e-05, + "learning_rate": 5.62036503662735e-08, + "loss": 0.0, + "num_input_tokens_seen": 16848784, + "step": 34230 + }, + { + "epoch": 4.518279002243632, + "grad_norm": 0.0002402652462478727, + "learning_rate": 5.6051477113694625e-08, + "loss": 0.0, + "num_input_tokens_seen": 16850960, + "step": 34235 + }, + { + "epoch": 4.518938894021381, + "grad_norm": 0.0011904650600627065, + "learning_rate": 5.589950420735379e-08, + "loss": 0.0, + "num_input_tokens_seen": 16853968, + "step": 34240 + }, + { + "epoch": 4.519598785799129, + "grad_norm": 4.7674417146481574e-05, + "learning_rate": 5.574773167950697e-08, + "loss": 0.045, + "num_input_tokens_seen": 16856592, + "step": 34245 + }, + { + "epoch": 4.520258677576877, + "grad_norm": 0.0012722613755613565, + "learning_rate": 5.5596159562366076e-08, + "loss": 0.0, + "num_input_tokens_seen": 16859024, + "step": 34250 + }, + { + "epoch": 4.520918569354626, + "grad_norm": 36.9177131652832, + "learning_rate": 5.5444787888101696e-08, + "loss": 0.0518, + "num_input_tokens_seen": 16861264, + "step": 34255 + }, + { + "epoch": 4.521578461132374, + "grad_norm": 1.2535748282971326e-05, + "learning_rate": 5.529361668884103e-08, + "loss": 0.0002, + "num_input_tokens_seen": 16863696, + "step": 34260 + }, + { + "epoch": 4.522238352910123, + "grad_norm": 1.3539308383769821e-05, + "learning_rate": 5.514264599666918e-08, + "loss": 0.0, + "num_input_tokens_seen": 16866064, + "step": 34265 + }, + { + "epoch": 4.522898244687871, + "grad_norm": 0.005475457292050123, + "learning_rate": 5.4991875843628745e-08, + "loss": 0.0, + "num_input_tokens_seen": 16868688, + "step": 34270 + }, + { + "epoch": 4.52355813646562, + "grad_norm": 0.0007006602245382965, + "learning_rate": 5.484130626171923e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16870800, + "step": 34275 + }, + { + "epoch": 4.524218028243368, + "grad_norm": 0.0001456452300772071, + "learning_rate": 5.46909372828982e-08, + "loss": 0.0, + "num_input_tokens_seen": 16873552, + "step": 34280 + }, + { + "epoch": 4.524877920021116, + "grad_norm": 1.1672700643539429, + "learning_rate": 5.454076893908055e-08, + "loss": 0.0008, + "num_input_tokens_seen": 16875984, + "step": 34285 + }, + { + "epoch": 4.5255378117988645, + "grad_norm": 6.52900489512831e-05, + "learning_rate": 5.439080126213802e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16878544, + "step": 34290 + }, + { + "epoch": 4.526197703576614, + "grad_norm": 0.0034513825085014105, + "learning_rate": 5.4241034283900364e-08, + "loss": 0.0, + "num_input_tokens_seen": 16881168, + "step": 34295 + }, + { + "epoch": 4.526857595354362, + "grad_norm": 1.3490453056874685e-05, + "learning_rate": 5.40914680361545e-08, + "loss": 0.0, + "num_input_tokens_seen": 16883472, + "step": 34300 + }, + { + "epoch": 4.52751748713211, + "grad_norm": 2.3728985979687423e-05, + "learning_rate": 5.394210255064502e-08, + "loss": 0.0, + "num_input_tokens_seen": 16885648, + "step": 34305 + }, + { + "epoch": 4.5281773789098585, + "grad_norm": 0.07513949275016785, + "learning_rate": 5.379293785907335e-08, + "loss": 0.0, + "num_input_tokens_seen": 16887888, + "step": 34310 + }, + { + "epoch": 4.528837270687607, + "grad_norm": 3.131102857878432e-05, + "learning_rate": 5.364397399309861e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16890128, + "step": 34315 + }, + { + "epoch": 4.529497162465356, + "grad_norm": 0.00018491598893888295, + "learning_rate": 5.349521098433762e-08, + "loss": 0.0, + "num_input_tokens_seen": 16892496, + "step": 34320 + }, + { + "epoch": 4.530157054243104, + "grad_norm": 0.007551413960754871, + "learning_rate": 5.334664886436391e-08, + "loss": 0.0415, + "num_input_tokens_seen": 16894608, + "step": 34325 + }, + { + "epoch": 4.5308169460208525, + "grad_norm": 0.00030818648519925773, + "learning_rate": 5.3198287664708907e-08, + "loss": 0.0, + "num_input_tokens_seen": 16897616, + "step": 34330 + }, + { + "epoch": 4.531476837798601, + "grad_norm": 2.6269792215316556e-05, + "learning_rate": 5.3050127416861104e-08, + "loss": 0.0, + "num_input_tokens_seen": 16900048, + "step": 34335 + }, + { + "epoch": 4.532136729576349, + "grad_norm": 5.578194395639002e-05, + "learning_rate": 5.290216815226656e-08, + "loss": 0.0165, + "num_input_tokens_seen": 16902416, + "step": 34340 + }, + { + "epoch": 4.532796621354098, + "grad_norm": 0.0002465557190589607, + "learning_rate": 5.275440990232838e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16904656, + "step": 34345 + }, + { + "epoch": 4.5334565131318465, + "grad_norm": 0.00656506372615695, + "learning_rate": 5.2606852698407367e-08, + "loss": 0.0, + "num_input_tokens_seen": 16907216, + "step": 34350 + }, + { + "epoch": 4.534116404909595, + "grad_norm": 14.822111129760742, + "learning_rate": 5.245949657182136e-08, + "loss": 0.0381, + "num_input_tokens_seen": 16909840, + "step": 34355 + }, + { + "epoch": 4.534776296687343, + "grad_norm": 2.3827880795579404e-05, + "learning_rate": 5.231234155384567e-08, + "loss": 0.0003, + "num_input_tokens_seen": 16912464, + "step": 34360 + }, + { + "epoch": 4.535436188465091, + "grad_norm": 1.264294496650109e-05, + "learning_rate": 5.216538767571277e-08, + "loss": 0.0, + "num_input_tokens_seen": 16915088, + "step": 34365 + }, + { + "epoch": 4.5360960802428405, + "grad_norm": 3.063208350795321e-05, + "learning_rate": 5.201863496861292e-08, + "loss": 0.002, + "num_input_tokens_seen": 16917584, + "step": 34370 + }, + { + "epoch": 4.536755972020589, + "grad_norm": 3.200107312295586e-05, + "learning_rate": 5.187208346369276e-08, + "loss": 0.0, + "num_input_tokens_seen": 16920080, + "step": 34375 + }, + { + "epoch": 4.537415863798337, + "grad_norm": 0.06736087054014206, + "learning_rate": 5.17257331920572e-08, + "loss": 0.0, + "num_input_tokens_seen": 16922640, + "step": 34380 + }, + { + "epoch": 4.538075755576085, + "grad_norm": 0.020671632140874863, + "learning_rate": 5.157958418476793e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16925200, + "step": 34385 + }, + { + "epoch": 4.538735647353834, + "grad_norm": 0.00017729074170347303, + "learning_rate": 5.1433636472844045e-08, + "loss": 0.0123, + "num_input_tokens_seen": 16927504, + "step": 34390 + }, + { + "epoch": 4.539395539131583, + "grad_norm": 0.0018899147398769855, + "learning_rate": 5.1287890087261864e-08, + "loss": 0.0079, + "num_input_tokens_seen": 16929872, + "step": 34395 + }, + { + "epoch": 4.540055430909331, + "grad_norm": 0.3784496784210205, + "learning_rate": 5.114234505895465e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16931856, + "step": 34400 + }, + { + "epoch": 4.540715322687079, + "grad_norm": 0.00019514707673806697, + "learning_rate": 5.0997001418814025e-08, + "loss": 0.0, + "num_input_tokens_seen": 16934224, + "step": 34405 + }, + { + "epoch": 4.541375214464828, + "grad_norm": 0.0017008042195811868, + "learning_rate": 5.085185919768742e-08, + "loss": 0.0, + "num_input_tokens_seen": 16936592, + "step": 34410 + }, + { + "epoch": 4.542035106242576, + "grad_norm": 0.00016672021592967212, + "learning_rate": 5.0706918426380754e-08, + "loss": 0.0, + "num_input_tokens_seen": 16939024, + "step": 34415 + }, + { + "epoch": 4.542694998020325, + "grad_norm": 0.00020502068218775094, + "learning_rate": 5.056217913565619e-08, + "loss": 0.0366, + "num_input_tokens_seen": 16941456, + "step": 34420 + }, + { + "epoch": 4.543354889798073, + "grad_norm": 3.393287261133082e-05, + "learning_rate": 5.0417641356233943e-08, + "loss": 0.0, + "num_input_tokens_seen": 16943632, + "step": 34425 + }, + { + "epoch": 4.544014781575822, + "grad_norm": 2.4823410058161244e-05, + "learning_rate": 5.027330511879102e-08, + "loss": 0.0, + "num_input_tokens_seen": 16946000, + "step": 34430 + }, + { + "epoch": 4.54467467335357, + "grad_norm": 0.00012084999616490677, + "learning_rate": 5.012917045396148e-08, + "loss": 0.0, + "num_input_tokens_seen": 16948560, + "step": 34435 + }, + { + "epoch": 4.545334565131318, + "grad_norm": 0.0009474708931520581, + "learning_rate": 4.998523739233729e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16950928, + "step": 34440 + }, + { + "epoch": 4.545994456909067, + "grad_norm": 2.3217171474243514e-05, + "learning_rate": 4.984150596446701e-08, + "loss": 0.0005, + "num_input_tokens_seen": 16953360, + "step": 34445 + }, + { + "epoch": 4.546654348686816, + "grad_norm": 9.132823470281437e-05, + "learning_rate": 4.9697976200856584e-08, + "loss": 0.0, + "num_input_tokens_seen": 16955856, + "step": 34450 + }, + { + "epoch": 4.547314240464564, + "grad_norm": 3.412169826333411e-05, + "learning_rate": 4.955464813196897e-08, + "loss": 0.0, + "num_input_tokens_seen": 16958160, + "step": 34455 + }, + { + "epoch": 4.547974132242312, + "grad_norm": 5.927090023760684e-05, + "learning_rate": 4.941152178822483e-08, + "loss": 0.0001, + "num_input_tokens_seen": 16960592, + "step": 34460 + }, + { + "epoch": 4.5486340240200605, + "grad_norm": 1.7155516616185196e-05, + "learning_rate": 4.926859720000165e-08, + "loss": 0.0, + "num_input_tokens_seen": 16963152, + "step": 34465 + }, + { + "epoch": 4.54929391579781, + "grad_norm": 8.997808618005365e-05, + "learning_rate": 4.912587439763394e-08, + "loss": 0.0, + "num_input_tokens_seen": 16965584, + "step": 34470 + }, + { + "epoch": 4.549953807575558, + "grad_norm": 0.00021670824207831174, + "learning_rate": 4.898335341141369e-08, + "loss": 0.0, + "num_input_tokens_seen": 16967888, + "step": 34475 + }, + { + "epoch": 4.550613699353306, + "grad_norm": 0.0006072040996514261, + "learning_rate": 4.884103427159014e-08, + "loss": 0.0, + "num_input_tokens_seen": 16970256, + "step": 34480 + }, + { + "epoch": 4.5512735911310545, + "grad_norm": 0.0130154425278306, + "learning_rate": 4.8698917008369144e-08, + "loss": 0.0615, + "num_input_tokens_seen": 16973200, + "step": 34485 + }, + { + "epoch": 4.551933482908803, + "grad_norm": 5.232800685917027e-05, + "learning_rate": 4.855700165191423e-08, + "loss": 0.0, + "num_input_tokens_seen": 16975568, + "step": 34490 + }, + { + "epoch": 4.552593374686552, + "grad_norm": 0.0001216641758219339, + "learning_rate": 4.841528823234609e-08, + "loss": 0.0, + "num_input_tokens_seen": 16978128, + "step": 34495 + }, + { + "epoch": 4.5532532664643, + "grad_norm": 0.009573639370501041, + "learning_rate": 4.8273776779741984e-08, + "loss": 0.0, + "num_input_tokens_seen": 16980560, + "step": 34500 + }, + { + "epoch": 4.5539131582420485, + "grad_norm": 9.984229109250009e-05, + "learning_rate": 4.8132467324136894e-08, + "loss": 0.0, + "num_input_tokens_seen": 16983184, + "step": 34505 + }, + { + "epoch": 4.554573050019797, + "grad_norm": 0.004783345386385918, + "learning_rate": 4.799135989552272e-08, + "loss": 0.0, + "num_input_tokens_seen": 16985488, + "step": 34510 + }, + { + "epoch": 4.555232941797545, + "grad_norm": 0.0024777057114988565, + "learning_rate": 4.7850454523848725e-08, + "loss": 0.0019, + "num_input_tokens_seen": 16987984, + "step": 34515 + }, + { + "epoch": 4.555892833575293, + "grad_norm": 0.002937519922852516, + "learning_rate": 4.770975123902066e-08, + "loss": 0.0, + "num_input_tokens_seen": 16990288, + "step": 34520 + }, + { + "epoch": 4.5565527253530425, + "grad_norm": 4.716146213468164e-05, + "learning_rate": 4.756925007090185e-08, + "loss": 0.0, + "num_input_tokens_seen": 16992912, + "step": 34525 + }, + { + "epoch": 4.557212617130791, + "grad_norm": 19.31949234008789, + "learning_rate": 4.7428951049312996e-08, + "loss": 0.0294, + "num_input_tokens_seen": 16995088, + "step": 34530 + }, + { + "epoch": 4.557872508908539, + "grad_norm": 4.645546869141981e-05, + "learning_rate": 4.728885420403117e-08, + "loss": 0.0, + "num_input_tokens_seen": 16997520, + "step": 34535 + }, + { + "epoch": 4.558532400686287, + "grad_norm": 0.001822422374971211, + "learning_rate": 4.714895956479104e-08, + "loss": 0.0, + "num_input_tokens_seen": 16999696, + "step": 34540 + }, + { + "epoch": 4.559192292464036, + "grad_norm": 0.007913710549473763, + "learning_rate": 4.700926716128428e-08, + "loss": 0.0337, + "num_input_tokens_seen": 17002256, + "step": 34545 + }, + { + "epoch": 4.559852184241785, + "grad_norm": 2.7463400328997523e-05, + "learning_rate": 4.686977702315953e-08, + "loss": 0.0, + "num_input_tokens_seen": 17004880, + "step": 34550 + }, + { + "epoch": 4.560512076019533, + "grad_norm": 1.1041237485187594e-05, + "learning_rate": 4.673048918002265e-08, + "loss": 0.0719, + "num_input_tokens_seen": 17007376, + "step": 34555 + }, + { + "epoch": 4.561171967797281, + "grad_norm": 1.2896975022158585e-05, + "learning_rate": 4.659140366143621e-08, + "loss": 0.0, + "num_input_tokens_seen": 17009808, + "step": 34560 + }, + { + "epoch": 4.56183185957503, + "grad_norm": 0.0025346383918076754, + "learning_rate": 4.64525204969205e-08, + "loss": 0.0, + "num_input_tokens_seen": 17012752, + "step": 34565 + }, + { + "epoch": 4.562491751352778, + "grad_norm": 5.901870463276282e-05, + "learning_rate": 4.631383971595226e-08, + "loss": 0.0, + "num_input_tokens_seen": 17015056, + "step": 34570 + }, + { + "epoch": 4.563151643130526, + "grad_norm": 8.729910769034177e-05, + "learning_rate": 4.617536134796529e-08, + "loss": 0.0007, + "num_input_tokens_seen": 17017424, + "step": 34575 + }, + { + "epoch": 4.563811534908275, + "grad_norm": 0.00011786862160079181, + "learning_rate": 4.6037085422351077e-08, + "loss": 0.0, + "num_input_tokens_seen": 17019984, + "step": 34580 + }, + { + "epoch": 4.564471426686024, + "grad_norm": 1.6084588423836976e-05, + "learning_rate": 4.5899011968457244e-08, + "loss": 0.0, + "num_input_tokens_seen": 17022864, + "step": 34585 + }, + { + "epoch": 4.565131318463772, + "grad_norm": 0.00034129302366636693, + "learning_rate": 4.576114101558914e-08, + "loss": 0.0, + "num_input_tokens_seen": 17025168, + "step": 34590 + }, + { + "epoch": 4.56579121024152, + "grad_norm": 0.00021121930330991745, + "learning_rate": 4.562347259300881e-08, + "loss": 0.0, + "num_input_tokens_seen": 17027728, + "step": 34595 + }, + { + "epoch": 4.566451102019268, + "grad_norm": 3.6314009776106104e-05, + "learning_rate": 4.54860067299353e-08, + "loss": 0.0308, + "num_input_tokens_seen": 17030096, + "step": 34600 + }, + { + "epoch": 4.567110993797018, + "grad_norm": 0.000650825328193605, + "learning_rate": 4.534874345554496e-08, + "loss": 0.0253, + "num_input_tokens_seen": 17032912, + "step": 34605 + }, + { + "epoch": 4.567770885574766, + "grad_norm": 0.00017748665413819253, + "learning_rate": 4.521168279897058e-08, + "loss": 0.0, + "num_input_tokens_seen": 17035600, + "step": 34610 + }, + { + "epoch": 4.568430777352514, + "grad_norm": 3.97105141018983e-05, + "learning_rate": 4.507482478930258e-08, + "loss": 0.087, + "num_input_tokens_seen": 17038096, + "step": 34615 + }, + { + "epoch": 4.569090669130262, + "grad_norm": 3.461187952780165e-05, + "learning_rate": 4.493816945558815e-08, + "loss": 0.0398, + "num_input_tokens_seen": 17040720, + "step": 34620 + }, + { + "epoch": 4.569750560908011, + "grad_norm": 0.009485268965363503, + "learning_rate": 4.480171682683098e-08, + "loss": 0.0105, + "num_input_tokens_seen": 17043280, + "step": 34625 + }, + { + "epoch": 4.57041045268576, + "grad_norm": 0.003441791282966733, + "learning_rate": 4.466546693199247e-08, + "loss": 0.0226, + "num_input_tokens_seen": 17045392, + "step": 34630 + }, + { + "epoch": 4.571070344463508, + "grad_norm": 0.0006981107871979475, + "learning_rate": 4.4529419799990695e-08, + "loss": 0.0, + "num_input_tokens_seen": 17047888, + "step": 34635 + }, + { + "epoch": 4.571730236241256, + "grad_norm": 5.575758041231893e-05, + "learning_rate": 4.439357545970068e-08, + "loss": 0.0016, + "num_input_tokens_seen": 17050320, + "step": 34640 + }, + { + "epoch": 4.572390128019005, + "grad_norm": 1.394921764585888e-05, + "learning_rate": 4.425793393995414e-08, + "loss": 0.0, + "num_input_tokens_seen": 17052944, + "step": 34645 + }, + { + "epoch": 4.573050019796753, + "grad_norm": 0.002040313323959708, + "learning_rate": 4.412249526954015e-08, + "loss": 0.0, + "num_input_tokens_seen": 17055248, + "step": 34650 + }, + { + "epoch": 4.573709911574502, + "grad_norm": 3.8957525248406455e-05, + "learning_rate": 4.398725947720483e-08, + "loss": 0.0, + "num_input_tokens_seen": 17057872, + "step": 34655 + }, + { + "epoch": 4.57436980335225, + "grad_norm": 3.296605427749455e-05, + "learning_rate": 4.385222659165067e-08, + "loss": 0.0, + "num_input_tokens_seen": 17060304, + "step": 34660 + }, + { + "epoch": 4.575029695129999, + "grad_norm": 1.76807698153425e-05, + "learning_rate": 4.3717396641537395e-08, + "loss": 0.0, + "num_input_tokens_seen": 17062928, + "step": 34665 + }, + { + "epoch": 4.575689586907747, + "grad_norm": 0.0012917850399389863, + "learning_rate": 4.358276965548202e-08, + "loss": 0.0, + "num_input_tokens_seen": 17065360, + "step": 34670 + }, + { + "epoch": 4.576349478685495, + "grad_norm": 0.18802160024642944, + "learning_rate": 4.344834566205802e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17067792, + "step": 34675 + }, + { + "epoch": 4.577009370463244, + "grad_norm": 0.00029112552874721587, + "learning_rate": 4.331412468979567e-08, + "loss": 0.0, + "num_input_tokens_seen": 17069776, + "step": 34680 + }, + { + "epoch": 4.577669262240993, + "grad_norm": 0.00016572128515690565, + "learning_rate": 4.318010676718254e-08, + "loss": 0.0, + "num_input_tokens_seen": 17072272, + "step": 34685 + }, + { + "epoch": 4.578329154018741, + "grad_norm": 3.444494359428063e-05, + "learning_rate": 4.304629192266318e-08, + "loss": 0.0, + "num_input_tokens_seen": 17074832, + "step": 34690 + }, + { + "epoch": 4.578989045796489, + "grad_norm": 0.04258698970079422, + "learning_rate": 4.2912680184638564e-08, + "loss": 0.0004, + "num_input_tokens_seen": 17077776, + "step": 34695 + }, + { + "epoch": 4.5796489375742375, + "grad_norm": 2.2253461793297902e-05, + "learning_rate": 4.277927158146688e-08, + "loss": 0.0657, + "num_input_tokens_seen": 17080336, + "step": 34700 + }, + { + "epoch": 4.580308829351987, + "grad_norm": 11.546168327331543, + "learning_rate": 4.264606614146327e-08, + "loss": 0.0066, + "num_input_tokens_seen": 17082576, + "step": 34705 + }, + { + "epoch": 4.580968721129735, + "grad_norm": 0.012570054270327091, + "learning_rate": 4.251306389289944e-08, + "loss": 0.0, + "num_input_tokens_seen": 17084880, + "step": 34710 + }, + { + "epoch": 4.581628612907483, + "grad_norm": 1.0254173503199127e-05, + "learning_rate": 4.2380264864004143e-08, + "loss": 0.0, + "num_input_tokens_seen": 17087440, + "step": 34715 + }, + { + "epoch": 4.5822885046852315, + "grad_norm": 0.0003984362119808793, + "learning_rate": 4.2247669082963065e-08, + "loss": 0.0, + "num_input_tokens_seen": 17089808, + "step": 34720 + }, + { + "epoch": 4.58294839646298, + "grad_norm": 0.00015969022933859378, + "learning_rate": 4.211527657791891e-08, + "loss": 0.0, + "num_input_tokens_seen": 17092048, + "step": 34725 + }, + { + "epoch": 4.583608288240729, + "grad_norm": 0.0001142570617957972, + "learning_rate": 4.198308737697087e-08, + "loss": 0.0, + "num_input_tokens_seen": 17094544, + "step": 34730 + }, + { + "epoch": 4.584268180018477, + "grad_norm": 1.6683903595549054e-05, + "learning_rate": 4.1851101508174834e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17096784, + "step": 34735 + }, + { + "epoch": 4.5849280717962255, + "grad_norm": 0.0009563491330482066, + "learning_rate": 4.171931899954439e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17099152, + "step": 34740 + }, + { + "epoch": 4.585587963573974, + "grad_norm": 5.6973076425492764e-05, + "learning_rate": 4.1587739879049067e-08, + "loss": 0.0, + "num_input_tokens_seen": 17101264, + "step": 34745 + }, + { + "epoch": 4.586247855351722, + "grad_norm": 0.00014773164002690464, + "learning_rate": 4.145636417461573e-08, + "loss": 0.0, + "num_input_tokens_seen": 17103504, + "step": 34750 + }, + { + "epoch": 4.586907747129471, + "grad_norm": 2.2371052182279527e-05, + "learning_rate": 4.132519191412787e-08, + "loss": 0.0, + "num_input_tokens_seen": 17105744, + "step": 34755 + }, + { + "epoch": 4.5875676389072195, + "grad_norm": 2.9566681405412965e-05, + "learning_rate": 4.1194223125425753e-08, + "loss": 0.0176, + "num_input_tokens_seen": 17108304, + "step": 34760 + }, + { + "epoch": 4.588227530684968, + "grad_norm": 2.690335168153979e-05, + "learning_rate": 4.1063457836306716e-08, + "loss": 0.0, + "num_input_tokens_seen": 17110544, + "step": 34765 + }, + { + "epoch": 4.588887422462716, + "grad_norm": 4.563686889014207e-05, + "learning_rate": 4.0932896074524546e-08, + "loss": 0.0, + "num_input_tokens_seen": 17113104, + "step": 34770 + }, + { + "epoch": 4.589547314240464, + "grad_norm": 0.001125271082855761, + "learning_rate": 4.080253786779042e-08, + "loss": 0.0226, + "num_input_tokens_seen": 17115472, + "step": 34775 + }, + { + "epoch": 4.590207206018213, + "grad_norm": 0.0025146508123725653, + "learning_rate": 4.0672383243771643e-08, + "loss": 0.0033, + "num_input_tokens_seen": 17118032, + "step": 34780 + }, + { + "epoch": 4.590867097795962, + "grad_norm": 1.4741677659912966e-05, + "learning_rate": 4.054243223009246e-08, + "loss": 0.0, + "num_input_tokens_seen": 17120592, + "step": 34785 + }, + { + "epoch": 4.59152698957371, + "grad_norm": 7.56392182665877e-05, + "learning_rate": 4.041268485433413e-08, + "loss": 0.0, + "num_input_tokens_seen": 17122896, + "step": 34790 + }, + { + "epoch": 4.592186881351458, + "grad_norm": 2.3552544007543474e-05, + "learning_rate": 4.028314114403475e-08, + "loss": 0.0, + "num_input_tokens_seen": 17125456, + "step": 34795 + }, + { + "epoch": 4.592846773129207, + "grad_norm": 3.42881066899281e-05, + "learning_rate": 4.015380112668909e-08, + "loss": 0.0, + "num_input_tokens_seen": 17128016, + "step": 34800 + }, + { + "epoch": 4.593506664906955, + "grad_norm": 1.2655588761845138e-05, + "learning_rate": 4.002466482974831e-08, + "loss": 0.0, + "num_input_tokens_seen": 17130512, + "step": 34805 + }, + { + "epoch": 4.594166556684704, + "grad_norm": 5.4459964303532615e-05, + "learning_rate": 3.989573228062082e-08, + "loss": 0.0364, + "num_input_tokens_seen": 17132944, + "step": 34810 + }, + { + "epoch": 4.594826448462452, + "grad_norm": 0.0026006638072431087, + "learning_rate": 3.976700350667173e-08, + "loss": 0.0, + "num_input_tokens_seen": 17135440, + "step": 34815 + }, + { + "epoch": 4.595486340240201, + "grad_norm": 5.579711069003679e-05, + "learning_rate": 3.963847853522262e-08, + "loss": 0.0, + "num_input_tokens_seen": 17137872, + "step": 34820 + }, + { + "epoch": 4.596146232017949, + "grad_norm": 0.01866592839360237, + "learning_rate": 3.951015739355201e-08, + "loss": 0.0, + "num_input_tokens_seen": 17140176, + "step": 34825 + }, + { + "epoch": 4.596806123795697, + "grad_norm": 0.004795776214450598, + "learning_rate": 3.9382040108895344e-08, + "loss": 0.0, + "num_input_tokens_seen": 17142672, + "step": 34830 + }, + { + "epoch": 4.5974660155734455, + "grad_norm": 0.04255275800824165, + "learning_rate": 3.925412670844419e-08, + "loss": 0.0004, + "num_input_tokens_seen": 17145232, + "step": 34835 + }, + { + "epoch": 4.598125907351195, + "grad_norm": 0.002885986352339387, + "learning_rate": 3.9126417219347506e-08, + "loss": 0.0, + "num_input_tokens_seen": 17147600, + "step": 34840 + }, + { + "epoch": 4.598785799128943, + "grad_norm": 0.0016264189034700394, + "learning_rate": 3.899891166871072e-08, + "loss": 0.0, + "num_input_tokens_seen": 17150032, + "step": 34845 + }, + { + "epoch": 4.599445690906691, + "grad_norm": 6.804332952015102e-05, + "learning_rate": 3.8871610083595965e-08, + "loss": 0.0003, + "num_input_tokens_seen": 17152400, + "step": 34850 + }, + { + "epoch": 4.6001055826844395, + "grad_norm": 0.009856065735220909, + "learning_rate": 3.874451249102195e-08, + "loss": 0.0214, + "num_input_tokens_seen": 17154896, + "step": 34855 + }, + { + "epoch": 4.600765474462188, + "grad_norm": 2.9393810109468177e-05, + "learning_rate": 3.861761891796433e-08, + "loss": 0.0002, + "num_input_tokens_seen": 17157264, + "step": 34860 + }, + { + "epoch": 4.601425366239937, + "grad_norm": 2.4371995095862076e-05, + "learning_rate": 3.8490929391355345e-08, + "loss": 0.0, + "num_input_tokens_seen": 17159632, + "step": 34865 + }, + { + "epoch": 4.602085258017685, + "grad_norm": 0.010247951373457909, + "learning_rate": 3.83644439380838e-08, + "loss": 0.0, + "num_input_tokens_seen": 17162000, + "step": 34870 + }, + { + "epoch": 4.6027451497954335, + "grad_norm": 2.818459688569419e-05, + "learning_rate": 3.823816258499546e-08, + "loss": 0.0, + "num_input_tokens_seen": 17164304, + "step": 34875 + }, + { + "epoch": 4.603405041573182, + "grad_norm": 0.0009323288686573505, + "learning_rate": 3.811208535889265e-08, + "loss": 0.0105, + "num_input_tokens_seen": 17166992, + "step": 34880 + }, + { + "epoch": 4.60406493335093, + "grad_norm": 0.002070516115054488, + "learning_rate": 3.79862122865342e-08, + "loss": 0.0411, + "num_input_tokens_seen": 17169552, + "step": 34885 + }, + { + "epoch": 4.604724825128679, + "grad_norm": 0.001990356482565403, + "learning_rate": 3.786054339463596e-08, + "loss": 0.0, + "num_input_tokens_seen": 17172176, + "step": 34890 + }, + { + "epoch": 4.6053847169064275, + "grad_norm": 2.619278893689625e-05, + "learning_rate": 3.7735078709869804e-08, + "loss": 0.0, + "num_input_tokens_seen": 17174416, + "step": 34895 + }, + { + "epoch": 4.606044608684176, + "grad_norm": 1.5399427866213955e-05, + "learning_rate": 3.760981825886533e-08, + "loss": 0.0, + "num_input_tokens_seen": 17176720, + "step": 34900 + }, + { + "epoch": 4.606704500461924, + "grad_norm": 0.00026046272250823677, + "learning_rate": 3.748476206820783e-08, + "loss": 0.0239, + "num_input_tokens_seen": 17179216, + "step": 34905 + }, + { + "epoch": 4.607364392239672, + "grad_norm": 0.006970543414354324, + "learning_rate": 3.735991016443929e-08, + "loss": 0.0016, + "num_input_tokens_seen": 17181648, + "step": 34910 + }, + { + "epoch": 4.6080242840174215, + "grad_norm": 0.00010547209240030497, + "learning_rate": 3.723526257405929e-08, + "loss": 0.0337, + "num_input_tokens_seen": 17184272, + "step": 34915 + }, + { + "epoch": 4.60868417579517, + "grad_norm": 0.08839015662670135, + "learning_rate": 3.711081932352278e-08, + "loss": 0.0, + "num_input_tokens_seen": 17186704, + "step": 34920 + }, + { + "epoch": 4.609344067572918, + "grad_norm": 0.00013485303497873247, + "learning_rate": 3.698658043924241e-08, + "loss": 0.0005, + "num_input_tokens_seen": 17189200, + "step": 34925 + }, + { + "epoch": 4.610003959350666, + "grad_norm": 2.3718108423054218e-05, + "learning_rate": 3.686254594758653e-08, + "loss": 0.0, + "num_input_tokens_seen": 17191888, + "step": 34930 + }, + { + "epoch": 4.610663851128415, + "grad_norm": 9.570590918883681e-05, + "learning_rate": 3.673871587488076e-08, + "loss": 0.0, + "num_input_tokens_seen": 17194448, + "step": 34935 + }, + { + "epoch": 4.611323742906164, + "grad_norm": 5.9927140682702884e-05, + "learning_rate": 3.661509024740739e-08, + "loss": 0.028, + "num_input_tokens_seen": 17197136, + "step": 34940 + }, + { + "epoch": 4.611983634683912, + "grad_norm": 0.0012820486444979906, + "learning_rate": 3.6491669091404553e-08, + "loss": 0.0, + "num_input_tokens_seen": 17199504, + "step": 34945 + }, + { + "epoch": 4.61264352646166, + "grad_norm": 2.176354428229388e-05, + "learning_rate": 3.636845243306785e-08, + "loss": 0.0, + "num_input_tokens_seen": 17201808, + "step": 34950 + }, + { + "epoch": 4.613303418239409, + "grad_norm": 3.7333906220737845e-05, + "learning_rate": 3.624544029854914e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17204432, + "step": 34955 + }, + { + "epoch": 4.613963310017157, + "grad_norm": 1.2981083273189142e-05, + "learning_rate": 3.6122632713956766e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17206928, + "step": 34960 + }, + { + "epoch": 4.614623201794906, + "grad_norm": 0.10895369201898575, + "learning_rate": 3.600002970535565e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17209552, + "step": 34965 + }, + { + "epoch": 4.615283093572654, + "grad_norm": 0.0003632585285231471, + "learning_rate": 3.587763129876753e-08, + "loss": 0.0, + "num_input_tokens_seen": 17212048, + "step": 34970 + }, + { + "epoch": 4.615942985350403, + "grad_norm": 2.6129724574275315e-05, + "learning_rate": 3.575543752017063e-08, + "loss": 0.0, + "num_input_tokens_seen": 17214160, + "step": 34975 + }, + { + "epoch": 4.616602877128151, + "grad_norm": 0.03864043951034546, + "learning_rate": 3.563344839549942e-08, + "loss": 0.0003, + "num_input_tokens_seen": 17216656, + "step": 34980 + }, + { + "epoch": 4.617262768905899, + "grad_norm": 0.002652583410963416, + "learning_rate": 3.5511663950645534e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17219024, + "step": 34985 + }, + { + "epoch": 4.617922660683648, + "grad_norm": 0.0005656993598677218, + "learning_rate": 3.539008421145673e-08, + "loss": 0.0, + "num_input_tokens_seen": 17221648, + "step": 34990 + }, + { + "epoch": 4.618582552461397, + "grad_norm": 1.3721179129788652e-05, + "learning_rate": 3.526870920373726e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17223952, + "step": 34995 + }, + { + "epoch": 4.619242444239145, + "grad_norm": 0.00020168579067103565, + "learning_rate": 3.514753895324829e-08, + "loss": 0.0, + "num_input_tokens_seen": 17226448, + "step": 35000 + }, + { + "epoch": 4.619902336016893, + "grad_norm": 0.06954360753297806, + "learning_rate": 3.5026573485707253e-08, + "loss": 0.0, + "num_input_tokens_seen": 17228944, + "step": 35005 + }, + { + "epoch": 4.620562227794641, + "grad_norm": 4.667209577746689e-05, + "learning_rate": 3.4905812826788285e-08, + "loss": 0.0, + "num_input_tokens_seen": 17231376, + "step": 35010 + }, + { + "epoch": 4.621222119572391, + "grad_norm": 0.00416320376098156, + "learning_rate": 3.478525700212176e-08, + "loss": 0.0411, + "num_input_tokens_seen": 17234384, + "step": 35015 + }, + { + "epoch": 4.621882011350139, + "grad_norm": 5.151130608282983e-05, + "learning_rate": 3.4664906037294996e-08, + "loss": 0.004, + "num_input_tokens_seen": 17236816, + "step": 35020 + }, + { + "epoch": 4.622541903127887, + "grad_norm": 0.00011490224278531969, + "learning_rate": 3.4544759957851553e-08, + "loss": 0.0, + "num_input_tokens_seen": 17238992, + "step": 35025 + }, + { + "epoch": 4.623201794905635, + "grad_norm": 0.0010727453045547009, + "learning_rate": 3.4424818789291373e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17241616, + "step": 35030 + }, + { + "epoch": 4.623861686683384, + "grad_norm": 1.041501309373416e-05, + "learning_rate": 3.4305082557071316e-08, + "loss": 0.0, + "num_input_tokens_seen": 17243792, + "step": 35035 + }, + { + "epoch": 4.624521578461132, + "grad_norm": 0.00030621461337432265, + "learning_rate": 3.418555128660461e-08, + "loss": 0.0, + "num_input_tokens_seen": 17246288, + "step": 35040 + }, + { + "epoch": 4.625181470238881, + "grad_norm": 0.0003549474640749395, + "learning_rate": 3.406622500326062e-08, + "loss": 0.0, + "num_input_tokens_seen": 17249040, + "step": 35045 + }, + { + "epoch": 4.625841362016629, + "grad_norm": 1.2623581824300345e-05, + "learning_rate": 3.3947103732365646e-08, + "loss": 0.0018, + "num_input_tokens_seen": 17251664, + "step": 35050 + }, + { + "epoch": 4.626501253794378, + "grad_norm": 0.00046227945131249726, + "learning_rate": 3.382818749920224e-08, + "loss": 0.0, + "num_input_tokens_seen": 17254160, + "step": 35055 + }, + { + "epoch": 4.627161145572126, + "grad_norm": 0.015203205868601799, + "learning_rate": 3.370947632900978e-08, + "loss": 0.0239, + "num_input_tokens_seen": 17256784, + "step": 35060 + }, + { + "epoch": 4.627821037349874, + "grad_norm": 0.09579546004533768, + "learning_rate": 3.3590970246983654e-08, + "loss": 0.0, + "num_input_tokens_seen": 17259088, + "step": 35065 + }, + { + "epoch": 4.628480929127623, + "grad_norm": 4.486577381612733e-05, + "learning_rate": 3.3472669278275637e-08, + "loss": 0.0, + "num_input_tokens_seen": 17261648, + "step": 35070 + }, + { + "epoch": 4.629140820905372, + "grad_norm": 1.4926635230949614e-05, + "learning_rate": 3.3354573447994637e-08, + "loss": 0.0035, + "num_input_tokens_seen": 17264336, + "step": 35075 + }, + { + "epoch": 4.62980071268312, + "grad_norm": 1.2104676898161415e-05, + "learning_rate": 3.3236682781205616e-08, + "loss": 0.0, + "num_input_tokens_seen": 17266576, + "step": 35080 + }, + { + "epoch": 4.630460604460868, + "grad_norm": 3.344564538565464e-05, + "learning_rate": 3.311899730292989e-08, + "loss": 0.0, + "num_input_tokens_seen": 17268944, + "step": 35085 + }, + { + "epoch": 4.6311204962386165, + "grad_norm": 0.00019546352268662304, + "learning_rate": 3.3001517038145356e-08, + "loss": 0.0, + "num_input_tokens_seen": 17271376, + "step": 35090 + }, + { + "epoch": 4.631780388016365, + "grad_norm": 0.004190162289887667, + "learning_rate": 3.28842420117863e-08, + "loss": 0.0, + "num_input_tokens_seen": 17273808, + "step": 35095 + }, + { + "epoch": 4.632440279794114, + "grad_norm": 6.786596350139007e-05, + "learning_rate": 3.27671722487437e-08, + "loss": 0.0, + "num_input_tokens_seen": 17276112, + "step": 35100 + }, + { + "epoch": 4.633100171571862, + "grad_norm": 0.00013934404705651104, + "learning_rate": 3.265030777386446e-08, + "loss": 0.0005, + "num_input_tokens_seen": 17278480, + "step": 35105 + }, + { + "epoch": 4.6337600633496105, + "grad_norm": 0.000934273237362504, + "learning_rate": 3.2533648611952623e-08, + "loss": 0.0252, + "num_input_tokens_seen": 17281296, + "step": 35110 + }, + { + "epoch": 4.634419955127359, + "grad_norm": 0.0025720647536218166, + "learning_rate": 3.241719478776805e-08, + "loss": 0.0, + "num_input_tokens_seen": 17283984, + "step": 35115 + }, + { + "epoch": 4.635079846905107, + "grad_norm": 0.0044370610266923904, + "learning_rate": 3.230094632602698e-08, + "loss": 0.0, + "num_input_tokens_seen": 17286352, + "step": 35120 + }, + { + "epoch": 4.635739738682856, + "grad_norm": 0.002383069135248661, + "learning_rate": 3.218490325140266e-08, + "loss": 0.0, + "num_input_tokens_seen": 17289040, + "step": 35125 + }, + { + "epoch": 4.6363996304606045, + "grad_norm": 0.0003934859996661544, + "learning_rate": 3.206906558852418e-08, + "loss": 0.0, + "num_input_tokens_seen": 17291536, + "step": 35130 + }, + { + "epoch": 4.637059522238353, + "grad_norm": 0.00044940304360352457, + "learning_rate": 3.195343336197742e-08, + "loss": 0.0, + "num_input_tokens_seen": 17294160, + "step": 35135 + }, + { + "epoch": 4.637719414016101, + "grad_norm": 81.78995513916016, + "learning_rate": 3.183800659630431e-08, + "loss": 0.0755, + "num_input_tokens_seen": 17296912, + "step": 35140 + }, + { + "epoch": 4.638379305793849, + "grad_norm": 2.1038411432527937e-05, + "learning_rate": 3.1722785316003475e-08, + "loss": 0.0, + "num_input_tokens_seen": 17299344, + "step": 35145 + }, + { + "epoch": 4.6390391975715985, + "grad_norm": 0.0004084999964106828, + "learning_rate": 3.160776954552979e-08, + "loss": 0.0011, + "num_input_tokens_seen": 17301840, + "step": 35150 + }, + { + "epoch": 4.639699089349347, + "grad_norm": 0.00018177898891735822, + "learning_rate": 3.149295930929441e-08, + "loss": 0.0518, + "num_input_tokens_seen": 17304336, + "step": 35155 + }, + { + "epoch": 4.640358981127095, + "grad_norm": 1.1378585440979805e-05, + "learning_rate": 3.137835463166494e-08, + "loss": 0.0016, + "num_input_tokens_seen": 17306768, + "step": 35160 + }, + { + "epoch": 4.641018872904843, + "grad_norm": 0.0009956208523362875, + "learning_rate": 3.12639555369657e-08, + "loss": 0.0305, + "num_input_tokens_seen": 17308816, + "step": 35165 + }, + { + "epoch": 4.641678764682592, + "grad_norm": 0.0009633854497224092, + "learning_rate": 3.1149762049476724e-08, + "loss": 0.0226, + "num_input_tokens_seen": 17311696, + "step": 35170 + }, + { + "epoch": 4.642338656460341, + "grad_norm": 0.1642853021621704, + "learning_rate": 3.103577419343484e-08, + "loss": 0.0, + "num_input_tokens_seen": 17314000, + "step": 35175 + }, + { + "epoch": 4.642998548238089, + "grad_norm": 0.015242863446474075, + "learning_rate": 3.092199199303325e-08, + "loss": 0.0, + "num_input_tokens_seen": 17316368, + "step": 35180 + }, + { + "epoch": 4.643658440015837, + "grad_norm": 0.010791040025651455, + "learning_rate": 3.0808415472421413e-08, + "loss": 0.0, + "num_input_tokens_seen": 17318800, + "step": 35185 + }, + { + "epoch": 4.644318331793586, + "grad_norm": 0.05711999163031578, + "learning_rate": 3.069504465570505e-08, + "loss": 0.036, + "num_input_tokens_seen": 17321296, + "step": 35190 + }, + { + "epoch": 4.644978223571334, + "grad_norm": 1.9168721337337047e-05, + "learning_rate": 3.0581879566946243e-08, + "loss": 0.0, + "num_input_tokens_seen": 17323472, + "step": 35195 + }, + { + "epoch": 4.645638115349083, + "grad_norm": 2.594672878331039e-05, + "learning_rate": 3.046892023016356e-08, + "loss": 0.0, + "num_input_tokens_seen": 17325712, + "step": 35200 + }, + { + "epoch": 4.646298007126831, + "grad_norm": 9.095690620597452e-05, + "learning_rate": 3.035616666933183e-08, + "loss": 0.0, + "num_input_tokens_seen": 17328464, + "step": 35205 + }, + { + "epoch": 4.64695789890458, + "grad_norm": 1.3668033716385253e-05, + "learning_rate": 3.024361890838201e-08, + "loss": 0.0, + "num_input_tokens_seen": 17330960, + "step": 35210 + }, + { + "epoch": 4.647617790682328, + "grad_norm": 0.0003607422695495188, + "learning_rate": 3.013127697120166e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17333776, + "step": 35215 + }, + { + "epoch": 4.648277682460076, + "grad_norm": 3.8394380680983886e-05, + "learning_rate": 3.00191408816346e-08, + "loss": 0.0028, + "num_input_tokens_seen": 17336144, + "step": 35220 + }, + { + "epoch": 4.648937574237825, + "grad_norm": 3.6790715967072174e-05, + "learning_rate": 2.99072106634809e-08, + "loss": 0.0, + "num_input_tokens_seen": 17338640, + "step": 35225 + }, + { + "epoch": 4.649597466015574, + "grad_norm": 2.8719652618747205e-05, + "learning_rate": 2.9795486340496557e-08, + "loss": 0.0, + "num_input_tokens_seen": 17341136, + "step": 35230 + }, + { + "epoch": 4.650257357793322, + "grad_norm": 0.00010499545896891505, + "learning_rate": 2.968396793639494e-08, + "loss": 0.0, + "num_input_tokens_seen": 17343248, + "step": 35235 + }, + { + "epoch": 4.65091724957107, + "grad_norm": 1.3791161109111272e-05, + "learning_rate": 2.9572655474844555e-08, + "loss": 0.0, + "num_input_tokens_seen": 17345744, + "step": 35240 + }, + { + "epoch": 4.6515771413488185, + "grad_norm": 0.000668855500407517, + "learning_rate": 2.9461548979470507e-08, + "loss": 0.0, + "num_input_tokens_seen": 17348432, + "step": 35245 + }, + { + "epoch": 4.652237033126568, + "grad_norm": 1.8187585737905465e-05, + "learning_rate": 2.9350648473854933e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17350864, + "step": 35250 + }, + { + "epoch": 4.652896924904316, + "grad_norm": 2.9176559110055678e-05, + "learning_rate": 2.9239953981535116e-08, + "loss": 0.0, + "num_input_tokens_seen": 17353360, + "step": 35255 + }, + { + "epoch": 4.653556816682064, + "grad_norm": 7.256161916302517e-05, + "learning_rate": 2.9129465526005592e-08, + "loss": 0.0, + "num_input_tokens_seen": 17355664, + "step": 35260 + }, + { + "epoch": 4.6542167084598125, + "grad_norm": 0.0024205967783927917, + "learning_rate": 2.9019183130716386e-08, + "loss": 0.0, + "num_input_tokens_seen": 17358288, + "step": 35265 + }, + { + "epoch": 4.654876600237561, + "grad_norm": 1.6926347598200664e-05, + "learning_rate": 2.8909106819074214e-08, + "loss": 0.0032, + "num_input_tokens_seen": 17360400, + "step": 35270 + }, + { + "epoch": 4.65553649201531, + "grad_norm": 0.003134387545287609, + "learning_rate": 2.8799236614442168e-08, + "loss": 0.0, + "num_input_tokens_seen": 17363216, + "step": 35275 + }, + { + "epoch": 4.656196383793058, + "grad_norm": 1.5004871784185525e-05, + "learning_rate": 2.868957254013915e-08, + "loss": 0.0, + "num_input_tokens_seen": 17365584, + "step": 35280 + }, + { + "epoch": 4.6568562755708065, + "grad_norm": 0.00010448491229908541, + "learning_rate": 2.8580114619440655e-08, + "loss": 0.0, + "num_input_tokens_seen": 17367824, + "step": 35285 + }, + { + "epoch": 4.657516167348555, + "grad_norm": 0.0001897216570796445, + "learning_rate": 2.8470862875578427e-08, + "loss": 0.0, + "num_input_tokens_seen": 17370064, + "step": 35290 + }, + { + "epoch": 4.658176059126303, + "grad_norm": 0.035831667482852936, + "learning_rate": 2.836181733174037e-08, + "loss": 0.0, + "num_input_tokens_seen": 17372688, + "step": 35295 + }, + { + "epoch": 4.658835950904052, + "grad_norm": 7.262377766892314e-05, + "learning_rate": 2.8252978011070404e-08, + "loss": 0.0, + "num_input_tokens_seen": 17375312, + "step": 35300 + }, + { + "epoch": 4.6594958426818005, + "grad_norm": 0.0005857625510543585, + "learning_rate": 2.8144344936669062e-08, + "loss": 0.0, + "num_input_tokens_seen": 17377744, + "step": 35305 + }, + { + "epoch": 4.660155734459549, + "grad_norm": 0.00033626792719587684, + "learning_rate": 2.8035918131592895e-08, + "loss": 0.0, + "num_input_tokens_seen": 17380240, + "step": 35310 + }, + { + "epoch": 4.660815626237297, + "grad_norm": 11.807950019836426, + "learning_rate": 2.792769761885472e-08, + "loss": 0.0132, + "num_input_tokens_seen": 17382608, + "step": 35315 + }, + { + "epoch": 4.661475518015045, + "grad_norm": 4.965622792951763e-05, + "learning_rate": 2.781968342142349e-08, + "loss": 0.0, + "num_input_tokens_seen": 17385104, + "step": 35320 + }, + { + "epoch": 4.662135409792794, + "grad_norm": 1.8443208318785764e-05, + "learning_rate": 2.771187556222454e-08, + "loss": 0.0, + "num_input_tokens_seen": 17387664, + "step": 35325 + }, + { + "epoch": 4.662795301570543, + "grad_norm": 0.007886563427746296, + "learning_rate": 2.7604274064139123e-08, + "loss": 0.0011, + "num_input_tokens_seen": 17389712, + "step": 35330 + }, + { + "epoch": 4.663455193348291, + "grad_norm": 5.378757487051189e-05, + "learning_rate": 2.7496878950005077e-08, + "loss": 0.0, + "num_input_tokens_seen": 17392400, + "step": 35335 + }, + { + "epoch": 4.664115085126039, + "grad_norm": 1.7856054910225794e-05, + "learning_rate": 2.738969024261606e-08, + "loss": 0.0, + "num_input_tokens_seen": 17395088, + "step": 35340 + }, + { + "epoch": 4.664774976903788, + "grad_norm": 1.7758238755050115e-05, + "learning_rate": 2.7282707964722427e-08, + "loss": 0.0, + "num_input_tokens_seen": 17397520, + "step": 35345 + }, + { + "epoch": 4.665434868681536, + "grad_norm": 0.00024594739079475403, + "learning_rate": 2.7175932139030022e-08, + "loss": 0.0008, + "num_input_tokens_seen": 17399824, + "step": 35350 + }, + { + "epoch": 4.666094760459285, + "grad_norm": 0.004140197765082121, + "learning_rate": 2.7069362788201267e-08, + "loss": 0.0, + "num_input_tokens_seen": 17402384, + "step": 35355 + }, + { + "epoch": 4.666754652237033, + "grad_norm": 5.721020698547363, + "learning_rate": 2.6962999934855068e-08, + "loss": 0.0039, + "num_input_tokens_seen": 17404752, + "step": 35360 + }, + { + "epoch": 4.667414544014782, + "grad_norm": 0.004005917347967625, + "learning_rate": 2.6856843601565816e-08, + "loss": 0.0, + "num_input_tokens_seen": 17407184, + "step": 35365 + }, + { + "epoch": 4.66807443579253, + "grad_norm": 2.8405822376953438e-05, + "learning_rate": 2.6750893810864596e-08, + "loss": 0.0, + "num_input_tokens_seen": 17409680, + "step": 35370 + }, + { + "epoch": 4.668734327570278, + "grad_norm": 3.6111789086135104e-05, + "learning_rate": 2.6645150585238528e-08, + "loss": 0.0381, + "num_input_tokens_seen": 17412304, + "step": 35375 + }, + { + "epoch": 4.6693942193480265, + "grad_norm": 6.57942146062851e-05, + "learning_rate": 2.653961394713067e-08, + "loss": 0.0005, + "num_input_tokens_seen": 17414736, + "step": 35380 + }, + { + "epoch": 4.670054111125776, + "grad_norm": 0.3089575469493866, + "learning_rate": 2.6434283918940424e-08, + "loss": 0.0002, + "num_input_tokens_seen": 17417232, + "step": 35385 + }, + { + "epoch": 4.670714002903524, + "grad_norm": 9.907195091247559, + "learning_rate": 2.6329160523023587e-08, + "loss": 0.0294, + "num_input_tokens_seen": 17420240, + "step": 35390 + }, + { + "epoch": 4.671373894681272, + "grad_norm": 1.850312764872797e-05, + "learning_rate": 2.6224243781691636e-08, + "loss": 0.0, + "num_input_tokens_seen": 17422928, + "step": 35395 + }, + { + "epoch": 4.6720337864590205, + "grad_norm": 3.9662245399085805e-05, + "learning_rate": 2.6119533717212428e-08, + "loss": 0.0, + "num_input_tokens_seen": 17425424, + "step": 35400 + }, + { + "epoch": 4.672693678236769, + "grad_norm": 6.341608241200447e-05, + "learning_rate": 2.601503035180963e-08, + "loss": 0.0, + "num_input_tokens_seen": 17427920, + "step": 35405 + }, + { + "epoch": 4.673353570014518, + "grad_norm": 2.3568481992697343e-05, + "learning_rate": 2.5910733707663947e-08, + "loss": 0.0, + "num_input_tokens_seen": 17430416, + "step": 35410 + }, + { + "epoch": 4.674013461792266, + "grad_norm": 2.081350248772651e-05, + "learning_rate": 2.5806643806910998e-08, + "loss": 0.0, + "num_input_tokens_seen": 17432784, + "step": 35415 + }, + { + "epoch": 4.6746733535700145, + "grad_norm": 0.011610975489020348, + "learning_rate": 2.5702760671643455e-08, + "loss": 0.0, + "num_input_tokens_seen": 17434896, + "step": 35420 + }, + { + "epoch": 4.675333245347763, + "grad_norm": 3.9640330214751884e-05, + "learning_rate": 2.559908432390967e-08, + "loss": 0.0, + "num_input_tokens_seen": 17437200, + "step": 35425 + }, + { + "epoch": 4.675993137125511, + "grad_norm": 0.9224775433540344, + "learning_rate": 2.5495614785714047e-08, + "loss": 0.0219, + "num_input_tokens_seen": 17439504, + "step": 35430 + }, + { + "epoch": 4.67665302890326, + "grad_norm": 7.126452692318708e-05, + "learning_rate": 2.5392352079017576e-08, + "loss": 0.0, + "num_input_tokens_seen": 17442000, + "step": 35435 + }, + { + "epoch": 4.6773129206810085, + "grad_norm": 0.007979700341820717, + "learning_rate": 2.528929622573661e-08, + "loss": 0.0, + "num_input_tokens_seen": 17444240, + "step": 35440 + }, + { + "epoch": 4.677972812458757, + "grad_norm": 5.438230436993763e-05, + "learning_rate": 2.5186447247744436e-08, + "loss": 0.0, + "num_input_tokens_seen": 17446672, + "step": 35445 + }, + { + "epoch": 4.678632704236505, + "grad_norm": 0.00035698155988939106, + "learning_rate": 2.5083805166869698e-08, + "loss": 0.0, + "num_input_tokens_seen": 17449232, + "step": 35450 + }, + { + "epoch": 4.679292596014253, + "grad_norm": 35.28067398071289, + "learning_rate": 2.4981370004897527e-08, + "loss": 0.0657, + "num_input_tokens_seen": 17451920, + "step": 35455 + }, + { + "epoch": 4.6799524877920025, + "grad_norm": 0.00012678831990342587, + "learning_rate": 2.487914178356898e-08, + "loss": 0.0, + "num_input_tokens_seen": 17454224, + "step": 35460 + }, + { + "epoch": 4.680612379569751, + "grad_norm": 4.944609827362001e-05, + "learning_rate": 2.4777120524581364e-08, + "loss": 0.0, + "num_input_tokens_seen": 17456784, + "step": 35465 + }, + { + "epoch": 4.681272271347499, + "grad_norm": 0.014934533275663853, + "learning_rate": 2.4675306249587912e-08, + "loss": 0.0487, + "num_input_tokens_seen": 17459088, + "step": 35470 + }, + { + "epoch": 4.681932163125247, + "grad_norm": 0.0011983781587332487, + "learning_rate": 2.45736989801979e-08, + "loss": 0.0411, + "num_input_tokens_seen": 17461456, + "step": 35475 + }, + { + "epoch": 4.682592054902996, + "grad_norm": 0.00014378594642039388, + "learning_rate": 2.4472298737976848e-08, + "loss": 0.0, + "num_input_tokens_seen": 17463760, + "step": 35480 + }, + { + "epoch": 4.683251946680745, + "grad_norm": 1.3808754374622367e-05, + "learning_rate": 2.4371105544446323e-08, + "loss": 0.0595, + "num_input_tokens_seen": 17466128, + "step": 35485 + }, + { + "epoch": 4.683911838458493, + "grad_norm": 0.4533805549144745, + "learning_rate": 2.427011942108348e-08, + "loss": 0.0002, + "num_input_tokens_seen": 17468624, + "step": 35490 + }, + { + "epoch": 4.684571730236241, + "grad_norm": 2.5709761757752858e-05, + "learning_rate": 2.416934038932217e-08, + "loss": 0.0595, + "num_input_tokens_seen": 17470928, + "step": 35495 + }, + { + "epoch": 4.68523162201399, + "grad_norm": 0.003099799156188965, + "learning_rate": 2.406876847055206e-08, + "loss": 0.0095, + "num_input_tokens_seen": 17473552, + "step": 35500 + }, + { + "epoch": 4.685891513791738, + "grad_norm": 2.1243256924208254e-05, + "learning_rate": 2.396840368611852e-08, + "loss": 0.0, + "num_input_tokens_seen": 17475600, + "step": 35505 + }, + { + "epoch": 4.686551405569487, + "grad_norm": 2.592039163573645e-05, + "learning_rate": 2.3868246057323515e-08, + "loss": 0.001, + "num_input_tokens_seen": 17478224, + "step": 35510 + }, + { + "epoch": 4.687211297347235, + "grad_norm": 1.40487991302507e-05, + "learning_rate": 2.3768295605424703e-08, + "loss": 0.1067, + "num_input_tokens_seen": 17480656, + "step": 35515 + }, + { + "epoch": 4.687871189124984, + "grad_norm": 0.00012769679597113281, + "learning_rate": 2.3668552351635896e-08, + "loss": 0.0, + "num_input_tokens_seen": 17482960, + "step": 35520 + }, + { + "epoch": 4.688531080902732, + "grad_norm": 2.972482798213605e-05, + "learning_rate": 2.356901631712671e-08, + "loss": 0.0, + "num_input_tokens_seen": 17485648, + "step": 35525 + }, + { + "epoch": 4.68919097268048, + "grad_norm": 0.00017019780352711678, + "learning_rate": 2.346968752302303e-08, + "loss": 0.0518, + "num_input_tokens_seen": 17488208, + "step": 35530 + }, + { + "epoch": 4.689850864458229, + "grad_norm": 0.004232440609484911, + "learning_rate": 2.3370565990406877e-08, + "loss": 0.0, + "num_input_tokens_seen": 17490704, + "step": 35535 + }, + { + "epoch": 4.690510756235978, + "grad_norm": 2.0393299564602785e-05, + "learning_rate": 2.3271651740315755e-08, + "loss": 0.0, + "num_input_tokens_seen": 17493008, + "step": 35540 + }, + { + "epoch": 4.691170648013726, + "grad_norm": 0.00018886018369812518, + "learning_rate": 2.3172944793743653e-08, + "loss": 0.0188, + "num_input_tokens_seen": 17495376, + "step": 35545 + }, + { + "epoch": 4.691830539791474, + "grad_norm": 1.3998830581840593e-05, + "learning_rate": 2.3074445171640366e-08, + "loss": 0.0, + "num_input_tokens_seen": 17497616, + "step": 35550 + }, + { + "epoch": 4.692490431569222, + "grad_norm": 5.956808308837935e-05, + "learning_rate": 2.2976152894911838e-08, + "loss": 0.0252, + "num_input_tokens_seen": 17500368, + "step": 35555 + }, + { + "epoch": 4.693150323346972, + "grad_norm": 0.014102444052696228, + "learning_rate": 2.2878067984419825e-08, + "loss": 0.0261, + "num_input_tokens_seen": 17502736, + "step": 35560 + }, + { + "epoch": 4.69381021512472, + "grad_norm": 6.130715337349102e-05, + "learning_rate": 2.2780190460981896e-08, + "loss": 0.0, + "num_input_tokens_seen": 17505232, + "step": 35565 + }, + { + "epoch": 4.694470106902468, + "grad_norm": 0.00011063072452088818, + "learning_rate": 2.2682520345372325e-08, + "loss": 0.0, + "num_input_tokens_seen": 17507600, + "step": 35570 + }, + { + "epoch": 4.695129998680216, + "grad_norm": 0.04662204161286354, + "learning_rate": 2.258505765832064e-08, + "loss": 0.0003, + "num_input_tokens_seen": 17510032, + "step": 35575 + }, + { + "epoch": 4.695789890457965, + "grad_norm": 0.03584326431155205, + "learning_rate": 2.248780242051229e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17512848, + "step": 35580 + }, + { + "epoch": 4.696449782235713, + "grad_norm": 0.005998116452246904, + "learning_rate": 2.239075465258966e-08, + "loss": 0.0, + "num_input_tokens_seen": 17515344, + "step": 35585 + }, + { + "epoch": 4.697109674013462, + "grad_norm": 3.821205609710887e-05, + "learning_rate": 2.2293914375149824e-08, + "loss": 0.0, + "num_input_tokens_seen": 17517776, + "step": 35590 + }, + { + "epoch": 4.69776956579121, + "grad_norm": 1.1565753993636463e-05, + "learning_rate": 2.2197281608746787e-08, + "loss": 0.0, + "num_input_tokens_seen": 17520272, + "step": 35595 + }, + { + "epoch": 4.698429457568959, + "grad_norm": 3.2036539778346196e-05, + "learning_rate": 2.210085637388992e-08, + "loss": 0.0062, + "num_input_tokens_seen": 17522512, + "step": 35600 + }, + { + "epoch": 4.699089349346707, + "grad_norm": 1.2361926565063186e-05, + "learning_rate": 2.2004638691044962e-08, + "loss": 0.0, + "num_input_tokens_seen": 17525264, + "step": 35605 + }, + { + "epoch": 4.699749241124455, + "grad_norm": 0.0018424964509904385, + "learning_rate": 2.190862858063347e-08, + "loss": 0.0338, + "num_input_tokens_seen": 17527568, + "step": 35610 + }, + { + "epoch": 4.700409132902204, + "grad_norm": 5.0226128223584965e-05, + "learning_rate": 2.1812826063032584e-08, + "loss": 0.0, + "num_input_tokens_seen": 17530064, + "step": 35615 + }, + { + "epoch": 4.701069024679953, + "grad_norm": 2.2851856556371786e-05, + "learning_rate": 2.1717231158576045e-08, + "loss": 0.0, + "num_input_tokens_seen": 17532688, + "step": 35620 + }, + { + "epoch": 4.701728916457701, + "grad_norm": 6.59624784020707e-05, + "learning_rate": 2.1621843887552948e-08, + "loss": 0.0, + "num_input_tokens_seen": 17535120, + "step": 35625 + }, + { + "epoch": 4.702388808235449, + "grad_norm": 0.0010319275315850973, + "learning_rate": 2.1526664270208662e-08, + "loss": 0.0, + "num_input_tokens_seen": 17537488, + "step": 35630 + }, + { + "epoch": 4.7030487000131975, + "grad_norm": 6.349383329506963e-05, + "learning_rate": 2.1431692326744244e-08, + "loss": 0.0, + "num_input_tokens_seen": 17540176, + "step": 35635 + }, + { + "epoch": 4.703708591790946, + "grad_norm": 0.00023460011288989335, + "learning_rate": 2.1336928077317017e-08, + "loss": 0.0, + "num_input_tokens_seen": 17542672, + "step": 35640 + }, + { + "epoch": 4.704368483568695, + "grad_norm": 2.8320706405793317e-05, + "learning_rate": 2.1242371542039893e-08, + "loss": 0.0016, + "num_input_tokens_seen": 17544848, + "step": 35645 + }, + { + "epoch": 4.705028375346443, + "grad_norm": 0.1557927280664444, + "learning_rate": 2.1148022740981708e-08, + "loss": 0.0, + "num_input_tokens_seen": 17547344, + "step": 35650 + }, + { + "epoch": 4.7056882671241915, + "grad_norm": 0.00012379908002912998, + "learning_rate": 2.1053881694167442e-08, + "loss": 0.0, + "num_input_tokens_seen": 17549968, + "step": 35655 + }, + { + "epoch": 4.70634815890194, + "grad_norm": 0.0013328184140846133, + "learning_rate": 2.095994842157789e-08, + "loss": 0.02, + "num_input_tokens_seen": 17552272, + "step": 35660 + }, + { + "epoch": 4.707008050679688, + "grad_norm": 2.367728120589163e-05, + "learning_rate": 2.086622294314955e-08, + "loss": 0.0002, + "num_input_tokens_seen": 17554768, + "step": 35665 + }, + { + "epoch": 4.707667942457437, + "grad_norm": 5.5861837608972564e-05, + "learning_rate": 2.077270527877495e-08, + "loss": 0.0, + "num_input_tokens_seen": 17557136, + "step": 35670 + }, + { + "epoch": 4.7083278342351855, + "grad_norm": 1.653864273976069e-05, + "learning_rate": 2.067939544830277e-08, + "loss": 0.0164, + "num_input_tokens_seen": 17559696, + "step": 35675 + }, + { + "epoch": 4.708987726012934, + "grad_norm": 6.894586113048717e-05, + "learning_rate": 2.0586293471537287e-08, + "loss": 0.0, + "num_input_tokens_seen": 17562128, + "step": 35680 + }, + { + "epoch": 4.709647617790682, + "grad_norm": 0.18366917967796326, + "learning_rate": 2.0493399368238573e-08, + "loss": 0.0295, + "num_input_tokens_seen": 17565136, + "step": 35685 + }, + { + "epoch": 4.71030750956843, + "grad_norm": 0.019991910085082054, + "learning_rate": 2.0400713158122863e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17567952, + "step": 35690 + }, + { + "epoch": 4.7109674013461795, + "grad_norm": 0.004305573645979166, + "learning_rate": 2.0308234860862084e-08, + "loss": 0.0, + "num_input_tokens_seen": 17570256, + "step": 35695 + }, + { + "epoch": 4.711627293123928, + "grad_norm": 2.053894058917649e-05, + "learning_rate": 2.021596449608409e-08, + "loss": 0.0, + "num_input_tokens_seen": 17572624, + "step": 35700 + }, + { + "epoch": 4.712287184901676, + "grad_norm": 0.00030093066743575037, + "learning_rate": 2.0123902083372557e-08, + "loss": 0.0, + "num_input_tokens_seen": 17574800, + "step": 35705 + }, + { + "epoch": 4.712947076679424, + "grad_norm": 0.00013274639786686748, + "learning_rate": 2.003204764226718e-08, + "loss": 0.0766, + "num_input_tokens_seen": 17577360, + "step": 35710 + }, + { + "epoch": 4.713606968457173, + "grad_norm": 5.4098480177344754e-05, + "learning_rate": 1.9940401192263146e-08, + "loss": 0.0, + "num_input_tokens_seen": 17579856, + "step": 35715 + }, + { + "epoch": 4.714266860234922, + "grad_norm": 8.203894685721025e-05, + "learning_rate": 1.9848962752812006e-08, + "loss": 0.0, + "num_input_tokens_seen": 17582224, + "step": 35720 + }, + { + "epoch": 4.71492675201267, + "grad_norm": 0.004259839653968811, + "learning_rate": 1.9757732343320898e-08, + "loss": 0.0, + "num_input_tokens_seen": 17584656, + "step": 35725 + }, + { + "epoch": 4.715586643790418, + "grad_norm": 0.001554732909426093, + "learning_rate": 1.9666709983152674e-08, + "loss": 0.0, + "num_input_tokens_seen": 17587152, + "step": 35730 + }, + { + "epoch": 4.716246535568167, + "grad_norm": 1.9275730664958246e-05, + "learning_rate": 1.957589569162632e-08, + "loss": 0.0, + "num_input_tokens_seen": 17589520, + "step": 35735 + }, + { + "epoch": 4.716906427345915, + "grad_norm": 15.459978103637695, + "learning_rate": 1.948528948801631e-08, + "loss": 0.0579, + "num_input_tokens_seen": 17591824, + "step": 35740 + }, + { + "epoch": 4.717566319123664, + "grad_norm": 0.0007542030070908368, + "learning_rate": 1.939489139155337e-08, + "loss": 0.0, + "num_input_tokens_seen": 17594512, + "step": 35745 + }, + { + "epoch": 4.718226210901412, + "grad_norm": 8.487552986480296e-05, + "learning_rate": 1.9304701421423707e-08, + "loss": 0.0, + "num_input_tokens_seen": 17596880, + "step": 35750 + }, + { + "epoch": 4.718886102679161, + "grad_norm": 2.4246677639894187e-05, + "learning_rate": 1.921471959676957e-08, + "loss": 0.0, + "num_input_tokens_seen": 17599504, + "step": 35755 + }, + { + "epoch": 4.719545994456909, + "grad_norm": 0.0041843983344733715, + "learning_rate": 1.9124945936688896e-08, + "loss": 0.02, + "num_input_tokens_seen": 17601872, + "step": 35760 + }, + { + "epoch": 4.720205886234657, + "grad_norm": 3.7647943827323616e-05, + "learning_rate": 1.903538046023545e-08, + "loss": 0.0016, + "num_input_tokens_seen": 17604560, + "step": 35765 + }, + { + "epoch": 4.720865778012406, + "grad_norm": 0.210673525929451, + "learning_rate": 1.8946023186419025e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17606736, + "step": 35770 + }, + { + "epoch": 4.721525669790155, + "grad_norm": 9.897825293592177e-06, + "learning_rate": 1.885687413420478e-08, + "loss": 0.0, + "num_input_tokens_seen": 17609360, + "step": 35775 + }, + { + "epoch": 4.722185561567903, + "grad_norm": 4.5366341510089114e-05, + "learning_rate": 1.876793332251425e-08, + "loss": 0.0, + "num_input_tokens_seen": 17611600, + "step": 35780 + }, + { + "epoch": 4.722845453345651, + "grad_norm": 0.00021711646695621312, + "learning_rate": 1.8679200770224445e-08, + "loss": 0.0, + "num_input_tokens_seen": 17614224, + "step": 35785 + }, + { + "epoch": 4.7235053451233995, + "grad_norm": 0.00011851973249576986, + "learning_rate": 1.859067649616797e-08, + "loss": 0.1016, + "num_input_tokens_seen": 17616656, + "step": 35790 + }, + { + "epoch": 4.724165236901149, + "grad_norm": 0.00015636181342415512, + "learning_rate": 1.8502360519133564e-08, + "loss": 0.0009, + "num_input_tokens_seen": 17619600, + "step": 35795 + }, + { + "epoch": 4.724825128678897, + "grad_norm": 0.00039138575084507465, + "learning_rate": 1.8414252857865688e-08, + "loss": 0.0, + "num_input_tokens_seen": 17622160, + "step": 35800 + }, + { + "epoch": 4.725485020456645, + "grad_norm": 2.3353479264187627e-05, + "learning_rate": 1.8326353531064708e-08, + "loss": 0.0, + "num_input_tokens_seen": 17624720, + "step": 35805 + }, + { + "epoch": 4.7261449122343935, + "grad_norm": 0.004620248917490244, + "learning_rate": 1.8238662557386262e-08, + "loss": 0.0, + "num_input_tokens_seen": 17627280, + "step": 35810 + }, + { + "epoch": 4.726804804012142, + "grad_norm": 1.8144639398087747e-05, + "learning_rate": 1.8151179955442463e-08, + "loss": 0.0, + "num_input_tokens_seen": 17630032, + "step": 35815 + }, + { + "epoch": 4.727464695789891, + "grad_norm": 1.72847921930952e-05, + "learning_rate": 1.806390574380079e-08, + "loss": 0.0337, + "num_input_tokens_seen": 17632720, + "step": 35820 + }, + { + "epoch": 4.728124587567639, + "grad_norm": 1.156251528300345e-05, + "learning_rate": 1.797683994098431e-08, + "loss": 0.0, + "num_input_tokens_seen": 17635280, + "step": 35825 + }, + { + "epoch": 4.7287844793453875, + "grad_norm": 1.7015587218338624e-05, + "learning_rate": 1.7889982565472473e-08, + "loss": 0.0, + "num_input_tokens_seen": 17637840, + "step": 35830 + }, + { + "epoch": 4.729444371123136, + "grad_norm": 0.00022032709966879338, + "learning_rate": 1.780333363569986e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17640208, + "step": 35835 + }, + { + "epoch": 4.730104262900884, + "grad_norm": 1.7302148989983834e-05, + "learning_rate": 1.77168931700572e-08, + "loss": 0.0, + "num_input_tokens_seen": 17642768, + "step": 35840 + }, + { + "epoch": 4.730764154678632, + "grad_norm": 5.2531137043843046e-05, + "learning_rate": 1.7630661186890827e-08, + "loss": 0.0647, + "num_input_tokens_seen": 17645136, + "step": 35845 + }, + { + "epoch": 4.7314240464563815, + "grad_norm": 3.242297316319309e-05, + "learning_rate": 1.7544637704502875e-08, + "loss": 0.0, + "num_input_tokens_seen": 17647504, + "step": 35850 + }, + { + "epoch": 4.73208393823413, + "grad_norm": 0.00018562580225989223, + "learning_rate": 1.745882274115118e-08, + "loss": 0.0, + "num_input_tokens_seen": 17649808, + "step": 35855 + }, + { + "epoch": 4.732743830011878, + "grad_norm": 1.8453007214702666e-05, + "learning_rate": 1.7373216315049288e-08, + "loss": 0.0, + "num_input_tokens_seen": 17652624, + "step": 35860 + }, + { + "epoch": 4.733403721789626, + "grad_norm": 6.539422611240298e-05, + "learning_rate": 1.7287818444366663e-08, + "loss": 0.0, + "num_input_tokens_seen": 17655248, + "step": 35865 + }, + { + "epoch": 4.734063613567375, + "grad_norm": 0.00016532238805666566, + "learning_rate": 1.7202629147228365e-08, + "loss": 0.0032, + "num_input_tokens_seen": 17657744, + "step": 35870 + }, + { + "epoch": 4.734723505345124, + "grad_norm": 1.4289161299529951e-05, + "learning_rate": 1.711764844171515e-08, + "loss": 0.0266, + "num_input_tokens_seen": 17660432, + "step": 35875 + }, + { + "epoch": 4.735383397122872, + "grad_norm": 0.0004984359256923199, + "learning_rate": 1.7032876345863588e-08, + "loss": 0.0023, + "num_input_tokens_seen": 17662736, + "step": 35880 + }, + { + "epoch": 4.73604328890062, + "grad_norm": 3.515151547617279e-05, + "learning_rate": 1.694831287766596e-08, + "loss": 0.0, + "num_input_tokens_seen": 17665040, + "step": 35885 + }, + { + "epoch": 4.736703180678369, + "grad_norm": 0.986121416091919, + "learning_rate": 1.6863958055070126e-08, + "loss": 0.0149, + "num_input_tokens_seen": 17667088, + "step": 35890 + }, + { + "epoch": 4.737363072456117, + "grad_norm": 1.1831551091745496e-05, + "learning_rate": 1.677981189597988e-08, + "loss": 0.0, + "num_input_tokens_seen": 17669456, + "step": 35895 + }, + { + "epoch": 4.738022964233865, + "grad_norm": 0.0015665609389543533, + "learning_rate": 1.6695874418254707e-08, + "loss": 0.0, + "num_input_tokens_seen": 17671760, + "step": 35900 + }, + { + "epoch": 4.738682856011614, + "grad_norm": 2.9511278626159765e-05, + "learning_rate": 1.6612145639709696e-08, + "loss": 0.0, + "num_input_tokens_seen": 17674320, + "step": 35905 + }, + { + "epoch": 4.739342747789363, + "grad_norm": 0.00012323328701313585, + "learning_rate": 1.652862557811563e-08, + "loss": 0.0239, + "num_input_tokens_seen": 17676688, + "step": 35910 + }, + { + "epoch": 4.740002639567111, + "grad_norm": 2.5474702852079645e-05, + "learning_rate": 1.6445314251198884e-08, + "loss": 0.0, + "num_input_tokens_seen": 17678800, + "step": 35915 + }, + { + "epoch": 4.740662531344859, + "grad_norm": 0.00870624277740717, + "learning_rate": 1.636221167664209e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17681296, + "step": 35920 + }, + { + "epoch": 4.741322423122607, + "grad_norm": 3.422133158892393e-05, + "learning_rate": 1.6279317872082697e-08, + "loss": 0.0, + "num_input_tokens_seen": 17683728, + "step": 35925 + }, + { + "epoch": 4.741982314900357, + "grad_norm": 0.00010780996672110632, + "learning_rate": 1.6196632855114745e-08, + "loss": 0.0, + "num_input_tokens_seen": 17686096, + "step": 35930 + }, + { + "epoch": 4.742642206678105, + "grad_norm": 2.2824242478236556e-05, + "learning_rate": 1.611415664328708e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17688528, + "step": 35935 + }, + { + "epoch": 4.743302098455853, + "grad_norm": 2.1149289750610478e-05, + "learning_rate": 1.6031889254105148e-08, + "loss": 0.0, + "num_input_tokens_seen": 17691088, + "step": 35940 + }, + { + "epoch": 4.743961990233601, + "grad_norm": 0.00030069437343627214, + "learning_rate": 1.594983070502942e-08, + "loss": 0.0381, + "num_input_tokens_seen": 17693392, + "step": 35945 + }, + { + "epoch": 4.74462188201135, + "grad_norm": 1.4820947399130091e-05, + "learning_rate": 1.5867981013475974e-08, + "loss": 0.0, + "num_input_tokens_seen": 17695824, + "step": 35950 + }, + { + "epoch": 4.745281773789099, + "grad_norm": 6.589458644157276e-05, + "learning_rate": 1.5786340196817127e-08, + "loss": 0.0177, + "num_input_tokens_seen": 17698000, + "step": 35955 + }, + { + "epoch": 4.745941665566847, + "grad_norm": 0.0008548679179511964, + "learning_rate": 1.570490827238047e-08, + "loss": 0.0, + "num_input_tokens_seen": 17700176, + "step": 35960 + }, + { + "epoch": 4.746601557344595, + "grad_norm": 0.22142356634140015, + "learning_rate": 1.562368525744939e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17702864, + "step": 35965 + }, + { + "epoch": 4.747261449122344, + "grad_norm": 9.57998854573816e-05, + "learning_rate": 1.5542671169262667e-08, + "loss": 0.0032, + "num_input_tokens_seen": 17705296, + "step": 35970 + }, + { + "epoch": 4.747921340900092, + "grad_norm": 0.003186148591339588, + "learning_rate": 1.5461866025015202e-08, + "loss": 0.0, + "num_input_tokens_seen": 17707920, + "step": 35975 + }, + { + "epoch": 4.748581232677841, + "grad_norm": 0.0001743086177157238, + "learning_rate": 1.5381269841857282e-08, + "loss": 0.0, + "num_input_tokens_seen": 17710352, + "step": 35980 + }, + { + "epoch": 4.749241124455589, + "grad_norm": 8.552165672881529e-05, + "learning_rate": 1.5300882636894662e-08, + "loss": 0.0, + "num_input_tokens_seen": 17712464, + "step": 35985 + }, + { + "epoch": 4.749901016233338, + "grad_norm": 2.156283335352782e-05, + "learning_rate": 1.5220704427189145e-08, + "loss": 0.0426, + "num_input_tokens_seen": 17714832, + "step": 35990 + }, + { + "epoch": 4.750560908011086, + "grad_norm": 1.526574487797916e-05, + "learning_rate": 1.5140735229757893e-08, + "loss": 0.075, + "num_input_tokens_seen": 17717200, + "step": 35995 + }, + { + "epoch": 4.751220799788834, + "grad_norm": 2.099963057844434e-05, + "learning_rate": 1.5060975061573777e-08, + "loss": 0.0, + "num_input_tokens_seen": 17719440, + "step": 36000 + }, + { + "epoch": 4.751880691566583, + "grad_norm": 2.1203761207289062e-05, + "learning_rate": 1.4981423939565364e-08, + "loss": 0.0, + "num_input_tokens_seen": 17721744, + "step": 36005 + }, + { + "epoch": 4.751880691566583, + "eval_loss": 0.2836270332336426, + "eval_runtime": 7.7987, + "eval_samples_per_second": 863.6, + "eval_steps_per_second": 107.966, + "num_input_tokens_seen": 17721744, + "step": 36005 + }, + { + "epoch": 4.752540583344332, + "grad_norm": 0.03689395636320114, + "learning_rate": 1.49020818806167e-08, + "loss": 0.0, + "num_input_tokens_seen": 17724048, + "step": 36010 + }, + { + "epoch": 4.75320047512208, + "grad_norm": 0.0009968762751668692, + "learning_rate": 1.4822948901567767e-08, + "loss": 0.0, + "num_input_tokens_seen": 17726672, + "step": 36015 + }, + { + "epoch": 4.753860366899828, + "grad_norm": 0.00012214395974297076, + "learning_rate": 1.474402501921368e-08, + "loss": 0.006, + "num_input_tokens_seen": 17729168, + "step": 36020 + }, + { + "epoch": 4.7545202586775765, + "grad_norm": 3.551087502273731e-05, + "learning_rate": 1.4665310250305708e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17731664, + "step": 36025 + }, + { + "epoch": 4.755180150455326, + "grad_norm": 0.08151374757289886, + "learning_rate": 1.4586804611550484e-08, + "loss": 0.001, + "num_input_tokens_seen": 17734224, + "step": 36030 + }, + { + "epoch": 4.755840042233074, + "grad_norm": 0.041376352310180664, + "learning_rate": 1.4508508119610019e-08, + "loss": 0.0, + "num_input_tokens_seen": 17736656, + "step": 36035 + }, + { + "epoch": 4.756499934010822, + "grad_norm": 0.00023899480584077537, + "learning_rate": 1.4430420791102461e-08, + "loss": 0.0, + "num_input_tokens_seen": 17738832, + "step": 36040 + }, + { + "epoch": 4.7571598257885706, + "grad_norm": 0.0009312837501056492, + "learning_rate": 1.4352542642601106e-08, + "loss": 0.0, + "num_input_tokens_seen": 17741328, + "step": 36045 + }, + { + "epoch": 4.757819717566319, + "grad_norm": 0.00021485799516085535, + "learning_rate": 1.427487369063507e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17743952, + "step": 36050 + }, + { + "epoch": 4.758479609344068, + "grad_norm": 0.4940636456012726, + "learning_rate": 1.4197413951689052e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17746512, + "step": 36055 + }, + { + "epoch": 4.759139501121816, + "grad_norm": 0.0010852499399334192, + "learning_rate": 1.4120163442203237e-08, + "loss": 0.0, + "num_input_tokens_seen": 17749072, + "step": 36060 + }, + { + "epoch": 4.7597993928995646, + "grad_norm": 8.565572352381423e-05, + "learning_rate": 1.404312217857373e-08, + "loss": 0.0, + "num_input_tokens_seen": 17752080, + "step": 36065 + }, + { + "epoch": 4.760459284677313, + "grad_norm": 0.003699306631460786, + "learning_rate": 1.3966290177151674e-08, + "loss": 0.0, + "num_input_tokens_seen": 17754704, + "step": 36070 + }, + { + "epoch": 4.761119176455061, + "grad_norm": 3.3029118640115485e-05, + "learning_rate": 1.3889667454244136e-08, + "loss": 0.0, + "num_input_tokens_seen": 17757456, + "step": 36075 + }, + { + "epoch": 4.76177906823281, + "grad_norm": 0.0007963759708218277, + "learning_rate": 1.3813254026113997e-08, + "loss": 0.0, + "num_input_tokens_seen": 17760080, + "step": 36080 + }, + { + "epoch": 4.7624389600105586, + "grad_norm": 3.863330857711844e-05, + "learning_rate": 1.373704990897917e-08, + "loss": 0.0007, + "num_input_tokens_seen": 17762576, + "step": 36085 + }, + { + "epoch": 4.763098851788307, + "grad_norm": 0.0002565206668805331, + "learning_rate": 1.3661055119013608e-08, + "loss": 0.0, + "num_input_tokens_seen": 17764944, + "step": 36090 + }, + { + "epoch": 4.763758743566055, + "grad_norm": 0.001068632467649877, + "learning_rate": 1.3585269672346633e-08, + "loss": 0.0, + "num_input_tokens_seen": 17767440, + "step": 36095 + }, + { + "epoch": 4.764418635343803, + "grad_norm": 0.00014805530372541398, + "learning_rate": 1.3509693585063042e-08, + "loss": 0.0, + "num_input_tokens_seen": 17769872, + "step": 36100 + }, + { + "epoch": 4.7650785271215526, + "grad_norm": 5.604649049928412e-05, + "learning_rate": 1.3434326873203449e-08, + "loss": 0.0, + "num_input_tokens_seen": 17772240, + "step": 36105 + }, + { + "epoch": 4.765738418899301, + "grad_norm": 0.04028937220573425, + "learning_rate": 1.3359169552763727e-08, + "loss": 0.0, + "num_input_tokens_seen": 17774608, + "step": 36110 + }, + { + "epoch": 4.766398310677049, + "grad_norm": 2.779177884804085e-05, + "learning_rate": 1.328422163969567e-08, + "loss": 0.0, + "num_input_tokens_seen": 17776976, + "step": 36115 + }, + { + "epoch": 4.767058202454797, + "grad_norm": 3.3683489164104685e-05, + "learning_rate": 1.320948314990633e-08, + "loss": 0.0, + "num_input_tokens_seen": 17779280, + "step": 36120 + }, + { + "epoch": 4.767718094232546, + "grad_norm": 0.0006951736286282539, + "learning_rate": 1.3134954099258466e-08, + "loss": 0.0, + "num_input_tokens_seen": 17781712, + "step": 36125 + }, + { + "epoch": 4.768377986010294, + "grad_norm": 1.4812599147262517e-05, + "learning_rate": 1.306063450357009e-08, + "loss": 0.0, + "num_input_tokens_seen": 17784208, + "step": 36130 + }, + { + "epoch": 4.769037877788043, + "grad_norm": 2.1049470888101496e-05, + "learning_rate": 1.298652437861536e-08, + "loss": 0.0, + "num_input_tokens_seen": 17786320, + "step": 36135 + }, + { + "epoch": 4.769697769565791, + "grad_norm": 1.1275597898929846e-05, + "learning_rate": 1.2912623740123362e-08, + "loss": 0.0, + "num_input_tokens_seen": 17788688, + "step": 36140 + }, + { + "epoch": 4.77035766134354, + "grad_norm": 1.6923049770412035e-05, + "learning_rate": 1.2838932603779107e-08, + "loss": 0.0, + "num_input_tokens_seen": 17791312, + "step": 36145 + }, + { + "epoch": 4.771017553121288, + "grad_norm": 0.00019921225612051785, + "learning_rate": 1.2765450985222859e-08, + "loss": 0.0, + "num_input_tokens_seen": 17793872, + "step": 36150 + }, + { + "epoch": 4.771677444899036, + "grad_norm": 0.10752221941947937, + "learning_rate": 1.269217890005081e-08, + "loss": 0.0, + "num_input_tokens_seen": 17796432, + "step": 36155 + }, + { + "epoch": 4.772337336676785, + "grad_norm": 0.048550963401794434, + "learning_rate": 1.2619116363814075e-08, + "loss": 0.0, + "num_input_tokens_seen": 17798864, + "step": 36160 + }, + { + "epoch": 4.772997228454534, + "grad_norm": 2.1920983272138983e-05, + "learning_rate": 1.2546263392019917e-08, + "loss": 0.0, + "num_input_tokens_seen": 17801168, + "step": 36165 + }, + { + "epoch": 4.773657120232282, + "grad_norm": 0.00014910813479218632, + "learning_rate": 1.2473620000130858e-08, + "loss": 0.0, + "num_input_tokens_seen": 17803344, + "step": 36170 + }, + { + "epoch": 4.77431701201003, + "grad_norm": 1.8873417502618395e-05, + "learning_rate": 1.2401186203564784e-08, + "loss": 0.0, + "num_input_tokens_seen": 17805840, + "step": 36175 + }, + { + "epoch": 4.7749769037877785, + "grad_norm": 0.0004082721716258675, + "learning_rate": 1.2328962017695288e-08, + "loss": 0.0213, + "num_input_tokens_seen": 17808336, + "step": 36180 + }, + { + "epoch": 4.775636795565527, + "grad_norm": 2.1403398932307027e-05, + "learning_rate": 1.225694745785144e-08, + "loss": 0.0, + "num_input_tokens_seen": 17810960, + "step": 36185 + }, + { + "epoch": 4.776296687343276, + "grad_norm": 2.5181774617522024e-05, + "learning_rate": 1.2185142539317905e-08, + "loss": 0.0, + "num_input_tokens_seen": 17813328, + "step": 36190 + }, + { + "epoch": 4.776956579121024, + "grad_norm": 0.00021293084137141705, + "learning_rate": 1.21135472773346e-08, + "loss": 0.0, + "num_input_tokens_seen": 17815760, + "step": 36195 + }, + { + "epoch": 4.7776164708987725, + "grad_norm": 0.0020100793335586786, + "learning_rate": 1.2042161687097152e-08, + "loss": 0.0, + "num_input_tokens_seen": 17818384, + "step": 36200 + }, + { + "epoch": 4.778276362676521, + "grad_norm": 0.0014156547840684652, + "learning_rate": 1.197098578375677e-08, + "loss": 0.0, + "num_input_tokens_seen": 17820752, + "step": 36205 + }, + { + "epoch": 4.778936254454269, + "grad_norm": 1.781239188858308e-05, + "learning_rate": 1.1900019582419818e-08, + "loss": 0.0079, + "num_input_tokens_seen": 17823248, + "step": 36210 + }, + { + "epoch": 4.779596146232018, + "grad_norm": 0.00027068998315371573, + "learning_rate": 1.1829263098148357e-08, + "loss": 0.0, + "num_input_tokens_seen": 17825680, + "step": 36215 + }, + { + "epoch": 4.7802560380097665, + "grad_norm": 7.034857117105275e-05, + "learning_rate": 1.1758716345960263e-08, + "loss": 0.0, + "num_input_tokens_seen": 17828112, + "step": 36220 + }, + { + "epoch": 4.780915929787515, + "grad_norm": 1.102480473491596e-05, + "learning_rate": 1.1688379340828224e-08, + "loss": 0.0, + "num_input_tokens_seen": 17830544, + "step": 36225 + }, + { + "epoch": 4.781575821565263, + "grad_norm": 1.4273274246079382e-05, + "learning_rate": 1.1618252097680858e-08, + "loss": 0.0, + "num_input_tokens_seen": 17833168, + "step": 36230 + }, + { + "epoch": 4.782235713343011, + "grad_norm": 1.7388576452503912e-05, + "learning_rate": 1.1548334631402146e-08, + "loss": 0.0, + "num_input_tokens_seen": 17835536, + "step": 36235 + }, + { + "epoch": 4.7828956051207605, + "grad_norm": 0.0008372985175810754, + "learning_rate": 1.1478626956831771e-08, + "loss": 0.0, + "num_input_tokens_seen": 17837712, + "step": 36240 + }, + { + "epoch": 4.783555496898509, + "grad_norm": 6.483028118964285e-05, + "learning_rate": 1.1409129088764346e-08, + "loss": 0.0, + "num_input_tokens_seen": 17840528, + "step": 36245 + }, + { + "epoch": 4.784215388676257, + "grad_norm": 0.02836316078901291, + "learning_rate": 1.1339841041950516e-08, + "loss": 0.0, + "num_input_tokens_seen": 17842896, + "step": 36250 + }, + { + "epoch": 4.784875280454005, + "grad_norm": 0.0002625687629915774, + "learning_rate": 1.1270762831096182e-08, + "loss": 0.0, + "num_input_tokens_seen": 17845520, + "step": 36255 + }, + { + "epoch": 4.785535172231754, + "grad_norm": 1.4671531971544027e-05, + "learning_rate": 1.1201894470862504e-08, + "loss": 0.0, + "num_input_tokens_seen": 17848144, + "step": 36260 + }, + { + "epoch": 4.786195064009503, + "grad_norm": 5.6285505706910044e-05, + "learning_rate": 1.1133235975866572e-08, + "loss": 0.0, + "num_input_tokens_seen": 17850320, + "step": 36265 + }, + { + "epoch": 4.786854955787251, + "grad_norm": 2.7550644517759793e-05, + "learning_rate": 1.1064787360680282e-08, + "loss": 0.0, + "num_input_tokens_seen": 17852816, + "step": 36270 + }, + { + "epoch": 4.787514847564999, + "grad_norm": 0.000557609018869698, + "learning_rate": 1.0996548639831793e-08, + "loss": 0.0, + "num_input_tokens_seen": 17855248, + "step": 36275 + }, + { + "epoch": 4.788174739342748, + "grad_norm": 1.5351559341070242e-05, + "learning_rate": 1.0928519827803961e-08, + "loss": 0.0, + "num_input_tokens_seen": 17857616, + "step": 36280 + }, + { + "epoch": 4.788834631120496, + "grad_norm": 2.7812551707029343e-05, + "learning_rate": 1.086070093903535e-08, + "loss": 0.0, + "num_input_tokens_seen": 17860240, + "step": 36285 + }, + { + "epoch": 4.789494522898245, + "grad_norm": 0.004202886018902063, + "learning_rate": 1.0793091987920444e-08, + "loss": 0.0, + "num_input_tokens_seen": 17862800, + "step": 36290 + }, + { + "epoch": 4.790154414675993, + "grad_norm": 0.0016465377993881702, + "learning_rate": 1.0725692988808322e-08, + "loss": 0.0, + "num_input_tokens_seen": 17865168, + "step": 36295 + }, + { + "epoch": 4.790814306453742, + "grad_norm": 5.0558181101223454e-05, + "learning_rate": 1.0658503956004206e-08, + "loss": 0.0, + "num_input_tokens_seen": 17867664, + "step": 36300 + }, + { + "epoch": 4.79147419823149, + "grad_norm": 0.002537961583584547, + "learning_rate": 1.0591524903768245e-08, + "loss": 0.0, + "num_input_tokens_seen": 17870160, + "step": 36305 + }, + { + "epoch": 4.792134090009238, + "grad_norm": 4.224347503622994e-05, + "learning_rate": 1.0524755846316402e-08, + "loss": 0.0, + "num_input_tokens_seen": 17872656, + "step": 36310 + }, + { + "epoch": 4.792793981786987, + "grad_norm": 0.0007909387350082397, + "learning_rate": 1.0458196797820007e-08, + "loss": 0.0, + "num_input_tokens_seen": 17875280, + "step": 36315 + }, + { + "epoch": 4.793453873564736, + "grad_norm": 0.2758484482765198, + "learning_rate": 1.039184777240565e-08, + "loss": 0.0001, + "num_input_tokens_seen": 17877776, + "step": 36320 + }, + { + "epoch": 4.794113765342484, + "grad_norm": 0.0005735823069699109, + "learning_rate": 1.0325708784155396e-08, + "loss": 0.0, + "num_input_tokens_seen": 17879952, + "step": 36325 + }, + { + "epoch": 4.794773657120232, + "grad_norm": 2.0948098608641885e-05, + "learning_rate": 1.0259779847106798e-08, + "loss": 0.0, + "num_input_tokens_seen": 17882512, + "step": 36330 + }, + { + "epoch": 4.7954335488979805, + "grad_norm": 2.039031642198097e-05, + "learning_rate": 1.0194060975252772e-08, + "loss": 0.0, + "num_input_tokens_seen": 17885072, + "step": 36335 + }, + { + "epoch": 4.79609344067573, + "grad_norm": 0.00025202587130479515, + "learning_rate": 1.0128552182541606e-08, + "loss": 0.0, + "num_input_tokens_seen": 17887568, + "step": 36340 + }, + { + "epoch": 4.796753332453478, + "grad_norm": 0.0006386773893609643, + "learning_rate": 1.0063253482877287e-08, + "loss": 0.0, + "num_input_tokens_seen": 17890192, + "step": 36345 + }, + { + "epoch": 4.797413224231226, + "grad_norm": 0.0011661931639537215, + "learning_rate": 9.998164890118844e-09, + "loss": 0.0005, + "num_input_tokens_seen": 17892880, + "step": 36350 + }, + { + "epoch": 4.7980731160089745, + "grad_norm": 0.0006924106855876744, + "learning_rate": 9.933286418080778e-09, + "loss": 0.0, + "num_input_tokens_seen": 17895376, + "step": 36355 + }, + { + "epoch": 4.798733007786723, + "grad_norm": 8.853508916217834e-05, + "learning_rate": 9.868618080533298e-09, + "loss": 0.0012, + "num_input_tokens_seen": 17897680, + "step": 36360 + }, + { + "epoch": 4.799392899564472, + "grad_norm": 1.4001774616190232e-05, + "learning_rate": 9.804159891201536e-09, + "loss": 0.0011, + "num_input_tokens_seen": 17900368, + "step": 36365 + }, + { + "epoch": 4.80005279134222, + "grad_norm": 0.0002682250633370131, + "learning_rate": 9.739911863766548e-09, + "loss": 0.0, + "num_input_tokens_seen": 17902928, + "step": 36370 + }, + { + "epoch": 4.8007126831199685, + "grad_norm": 0.00022198254009708762, + "learning_rate": 9.675874011864205e-09, + "loss": 0.0, + "num_input_tokens_seen": 17905488, + "step": 36375 + }, + { + "epoch": 4.801372574897717, + "grad_norm": 2.1480387658812106e-05, + "learning_rate": 9.612046349086411e-09, + "loss": 0.0, + "num_input_tokens_seen": 17907920, + "step": 36380 + }, + { + "epoch": 4.802032466675465, + "grad_norm": 2.2065241864765994e-05, + "learning_rate": 9.548428888979775e-09, + "loss": 0.0396, + "num_input_tokens_seen": 17910544, + "step": 36385 + }, + { + "epoch": 4.802692358453213, + "grad_norm": 20.490070343017578, + "learning_rate": 9.485021645046941e-09, + "loss": 0.0054, + "num_input_tokens_seen": 17912976, + "step": 36390 + }, + { + "epoch": 4.8033522502309625, + "grad_norm": 0.0011220432352274656, + "learning_rate": 9.421824630745478e-09, + "loss": 0.0, + "num_input_tokens_seen": 17915280, + "step": 36395 + }, + { + "epoch": 4.804012142008711, + "grad_norm": 1.5031295333756134e-05, + "learning_rate": 9.358837859488544e-09, + "loss": 0.0, + "num_input_tokens_seen": 17917648, + "step": 36400 + }, + { + "epoch": 4.804672033786459, + "grad_norm": 0.019448235630989075, + "learning_rate": 9.296061344644667e-09, + "loss": 0.0, + "num_input_tokens_seen": 17919952, + "step": 36405 + }, + { + "epoch": 4.805331925564207, + "grad_norm": 0.00023923815751913935, + "learning_rate": 9.233495099537525e-09, + "loss": 0.0035, + "num_input_tokens_seen": 17922512, + "step": 36410 + }, + { + "epoch": 4.805991817341956, + "grad_norm": 0.00014659958833362907, + "learning_rate": 9.171139137446605e-09, + "loss": 0.0005, + "num_input_tokens_seen": 17924944, + "step": 36415 + }, + { + "epoch": 4.806651709119705, + "grad_norm": 4.474867455428466e-05, + "learning_rate": 9.10899347160632e-09, + "loss": 0.0, + "num_input_tokens_seen": 17927312, + "step": 36420 + }, + { + "epoch": 4.807311600897453, + "grad_norm": 1.4712712982145604e-05, + "learning_rate": 9.047058115206674e-09, + "loss": 0.0, + "num_input_tokens_seen": 17929744, + "step": 36425 + }, + { + "epoch": 4.807971492675201, + "grad_norm": 5.0706272304523736e-05, + "learning_rate": 8.985333081393154e-09, + "loss": 0.028, + "num_input_tokens_seen": 17932112, + "step": 36430 + }, + { + "epoch": 4.80863138445295, + "grad_norm": 4.384171188576147e-05, + "learning_rate": 8.923818383266169e-09, + "loss": 0.0, + "num_input_tokens_seen": 17934480, + "step": 36435 + }, + { + "epoch": 4.809291276230698, + "grad_norm": 4.905409150524065e-05, + "learning_rate": 8.862514033882051e-09, + "loss": 0.0, + "num_input_tokens_seen": 17936912, + "step": 36440 + }, + { + "epoch": 4.809951168008446, + "grad_norm": 5.2517101721605286e-05, + "learning_rate": 8.80142004625195e-09, + "loss": 0.0, + "num_input_tokens_seen": 17939536, + "step": 36445 + }, + { + "epoch": 4.810611059786195, + "grad_norm": 1.8874017769121565e-05, + "learning_rate": 8.740536433342826e-09, + "loss": 0.0001, + "num_input_tokens_seen": 17941712, + "step": 36450 + }, + { + "epoch": 4.811270951563944, + "grad_norm": 7.149603334255517e-05, + "learning_rate": 8.679863208076787e-09, + "loss": 0.0, + "num_input_tokens_seen": 17944016, + "step": 36455 + }, + { + "epoch": 4.811930843341692, + "grad_norm": 9.245514775102492e-06, + "learning_rate": 8.619400383331088e-09, + "loss": 0.0, + "num_input_tokens_seen": 17946320, + "step": 36460 + }, + { + "epoch": 4.81259073511944, + "grad_norm": 4.676095340983011e-05, + "learning_rate": 8.559147971938574e-09, + "loss": 0.0, + "num_input_tokens_seen": 17948752, + "step": 36465 + }, + { + "epoch": 4.813250626897188, + "grad_norm": 4.1727682400960475e-05, + "learning_rate": 8.499105986687572e-09, + "loss": 0.0001, + "num_input_tokens_seen": 17951376, + "step": 36470 + }, + { + "epoch": 4.813910518674938, + "grad_norm": 4.7045174142112955e-05, + "learning_rate": 8.439274440321442e-09, + "loss": 0.0, + "num_input_tokens_seen": 17953616, + "step": 36475 + }, + { + "epoch": 4.814570410452686, + "grad_norm": 0.01597621850669384, + "learning_rate": 8.379653345538918e-09, + "loss": 0.0, + "num_input_tokens_seen": 17955792, + "step": 36480 + }, + { + "epoch": 4.815230302230434, + "grad_norm": 0.0008904458954930305, + "learning_rate": 8.320242714994319e-09, + "loss": 0.0, + "num_input_tokens_seen": 17958288, + "step": 36485 + }, + { + "epoch": 4.815890194008182, + "grad_norm": 0.15588656067848206, + "learning_rate": 8.261042561297004e-09, + "loss": 0.0001, + "num_input_tokens_seen": 17961104, + "step": 36490 + }, + { + "epoch": 4.816550085785931, + "grad_norm": 0.00011970204650424421, + "learning_rate": 8.202052897011702e-09, + "loss": 0.0, + "num_input_tokens_seen": 17963600, + "step": 36495 + }, + { + "epoch": 4.81720997756368, + "grad_norm": 6.248672434594482e-05, + "learning_rate": 8.143273734658729e-09, + "loss": 0.0, + "num_input_tokens_seen": 17966096, + "step": 36500 + }, + { + "epoch": 4.817869869341428, + "grad_norm": 2.0748017050209455e-05, + "learning_rate": 8.084705086713439e-09, + "loss": 0.0, + "num_input_tokens_seen": 17968592, + "step": 36505 + }, + { + "epoch": 4.818529761119176, + "grad_norm": 14.28984546661377, + "learning_rate": 8.026346965606556e-09, + "loss": 0.0412, + "num_input_tokens_seen": 17970832, + "step": 36510 + }, + { + "epoch": 4.819189652896925, + "grad_norm": 1.3816493265039753e-05, + "learning_rate": 7.968199383724283e-09, + "loss": 0.0, + "num_input_tokens_seen": 17973136, + "step": 36515 + }, + { + "epoch": 4.819849544674673, + "grad_norm": 0.040203265845775604, + "learning_rate": 7.91026235340786e-09, + "loss": 0.0, + "num_input_tokens_seen": 17975632, + "step": 36520 + }, + { + "epoch": 4.820509436452422, + "grad_norm": 0.0003583792713470757, + "learning_rate": 7.852535886954225e-09, + "loss": 0.0, + "num_input_tokens_seen": 17978128, + "step": 36525 + }, + { + "epoch": 4.82116932823017, + "grad_norm": 8.75997357070446e-05, + "learning_rate": 7.795019996615249e-09, + "loss": 0.0, + "num_input_tokens_seen": 17980752, + "step": 36530 + }, + { + "epoch": 4.821829220007919, + "grad_norm": 1.4387996088771615e-05, + "learning_rate": 7.737714694598274e-09, + "loss": 0.0, + "num_input_tokens_seen": 17983504, + "step": 36535 + }, + { + "epoch": 4.822489111785667, + "grad_norm": 1.6647998563712463e-05, + "learning_rate": 7.680619993065906e-09, + "loss": 0.0, + "num_input_tokens_seen": 17985872, + "step": 36540 + }, + { + "epoch": 4.823149003563415, + "grad_norm": 2.520790440030396e-05, + "learning_rate": 7.62373590413623e-09, + "loss": 0.0, + "num_input_tokens_seen": 17988560, + "step": 36545 + }, + { + "epoch": 4.823808895341164, + "grad_norm": 0.43590307235717773, + "learning_rate": 7.567062439882254e-09, + "loss": 0.0001, + "num_input_tokens_seen": 17990928, + "step": 36550 + }, + { + "epoch": 4.824468787118913, + "grad_norm": 0.0005357779446057975, + "learning_rate": 7.510599612332801e-09, + "loss": 0.0025, + "num_input_tokens_seen": 17993296, + "step": 36555 + }, + { + "epoch": 4.825128678896661, + "grad_norm": 2.613514516269788e-05, + "learning_rate": 7.454347433471397e-09, + "loss": 0.0, + "num_input_tokens_seen": 17995792, + "step": 36560 + }, + { + "epoch": 4.825788570674409, + "grad_norm": 6.79643708281219e-05, + "learning_rate": 7.398305915237379e-09, + "loss": 0.028, + "num_input_tokens_seen": 17998096, + "step": 36565 + }, + { + "epoch": 4.8264484624521575, + "grad_norm": 0.011779813095927238, + "learning_rate": 7.342475069525012e-09, + "loss": 0.0, + "num_input_tokens_seen": 18000528, + "step": 36570 + }, + { + "epoch": 4.827108354229907, + "grad_norm": 0.0013433409621939063, + "learning_rate": 7.2868549081841476e-09, + "loss": 0.0, + "num_input_tokens_seen": 18003024, + "step": 36575 + }, + { + "epoch": 4.827768246007655, + "grad_norm": 3.9421811379725114e-05, + "learning_rate": 7.2314454430195685e-09, + "loss": 0.0, + "num_input_tokens_seen": 18005712, + "step": 36580 + }, + { + "epoch": 4.828428137785403, + "grad_norm": 54.147377014160156, + "learning_rate": 7.176246685791754e-09, + "loss": 0.0915, + "num_input_tokens_seen": 18008144, + "step": 36585 + }, + { + "epoch": 4.8290880295631515, + "grad_norm": 0.00025381697923876345, + "learning_rate": 7.121258648216e-09, + "loss": 0.0239, + "num_input_tokens_seen": 18010256, + "step": 36590 + }, + { + "epoch": 4.8297479213409, + "grad_norm": 0.00021304019901435822, + "learning_rate": 7.066481341963304e-09, + "loss": 0.0, + "num_input_tokens_seen": 18012752, + "step": 36595 + }, + { + "epoch": 4.830407813118649, + "grad_norm": 1.7139234842034057e-05, + "learning_rate": 7.0119147786597e-09, + "loss": 0.0, + "num_input_tokens_seen": 18014992, + "step": 36600 + }, + { + "epoch": 4.831067704896397, + "grad_norm": 1.4912050573911984e-05, + "learning_rate": 6.957558969886368e-09, + "loss": 0.0004, + "num_input_tokens_seen": 18017552, + "step": 36605 + }, + { + "epoch": 4.8317275966741455, + "grad_norm": 6.469316576840356e-05, + "learning_rate": 6.9034139271803015e-09, + "loss": 0.0, + "num_input_tokens_seen": 18020048, + "step": 36610 + }, + { + "epoch": 4.832387488451894, + "grad_norm": 0.2807008922100067, + "learning_rate": 6.849479662033086e-09, + "loss": 0.0002, + "num_input_tokens_seen": 18022480, + "step": 36615 + }, + { + "epoch": 4.833047380229642, + "grad_norm": 0.0032580739352852106, + "learning_rate": 6.795756185891899e-09, + "loss": 0.0487, + "num_input_tokens_seen": 18024848, + "step": 36620 + }, + { + "epoch": 4.833707272007391, + "grad_norm": 0.00011517686652950943, + "learning_rate": 6.742243510159396e-09, + "loss": 0.0, + "num_input_tokens_seen": 18027152, + "step": 36625 + }, + { + "epoch": 4.8343671637851395, + "grad_norm": 2.705694168980699e-05, + "learning_rate": 6.688941646193047e-09, + "loss": 0.0, + "num_input_tokens_seen": 18029584, + "step": 36630 + }, + { + "epoch": 4.835027055562888, + "grad_norm": 0.013773814775049686, + "learning_rate": 6.635850605305804e-09, + "loss": 0.0, + "num_input_tokens_seen": 18031952, + "step": 36635 + }, + { + "epoch": 4.835686947340636, + "grad_norm": 0.022058840841054916, + "learning_rate": 6.582970398765986e-09, + "loss": 0.0, + "num_input_tokens_seen": 18034512, + "step": 36640 + }, + { + "epoch": 4.836346839118384, + "grad_norm": 4.3627336708595976e-05, + "learning_rate": 6.530301037796837e-09, + "loss": 0.0, + "num_input_tokens_seen": 18037200, + "step": 36645 + }, + { + "epoch": 4.8370067308961335, + "grad_norm": 5.2935080020688474e-05, + "learning_rate": 6.477842533577194e-09, + "loss": 0.0, + "num_input_tokens_seen": 18039952, + "step": 36650 + }, + { + "epoch": 4.837666622673882, + "grad_norm": 10.672889709472656, + "learning_rate": 6.4255948972409265e-09, + "loss": 0.0337, + "num_input_tokens_seen": 18042320, + "step": 36655 + }, + { + "epoch": 4.83832651445163, + "grad_norm": 0.00011545175220817327, + "learning_rate": 6.3735581398772775e-09, + "loss": 0.0, + "num_input_tokens_seen": 18044752, + "step": 36660 + }, + { + "epoch": 4.838986406229378, + "grad_norm": 6.284004484768957e-05, + "learning_rate": 6.321732272530633e-09, + "loss": 0.0, + "num_input_tokens_seen": 18047440, + "step": 36665 + }, + { + "epoch": 4.839646298007127, + "grad_norm": 0.00020805255917366594, + "learning_rate": 6.2701173062006396e-09, + "loss": 0.0502, + "num_input_tokens_seen": 18049680, + "step": 36670 + }, + { + "epoch": 4.840306189784875, + "grad_norm": 0.0001228711480507627, + "learning_rate": 6.2187132518422004e-09, + "loss": 0.0381, + "num_input_tokens_seen": 18052176, + "step": 36675 + }, + { + "epoch": 4.840966081562624, + "grad_norm": 0.0029427942354232073, + "learning_rate": 6.167520120365477e-09, + "loss": 0.0, + "num_input_tokens_seen": 18054992, + "step": 36680 + }, + { + "epoch": 4.841625973340372, + "grad_norm": 0.12953588366508484, + "learning_rate": 6.1165379226358895e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18057488, + "step": 36685 + }, + { + "epoch": 4.842285865118121, + "grad_norm": 4.0880309825297445e-05, + "learning_rate": 6.065766669474004e-09, + "loss": 0.0, + "num_input_tokens_seen": 18059984, + "step": 36690 + }, + { + "epoch": 4.842945756895869, + "grad_norm": 2.0065477656316943e-05, + "learning_rate": 6.015206371655535e-09, + "loss": 0.0, + "num_input_tokens_seen": 18062352, + "step": 36695 + }, + { + "epoch": 4.843605648673617, + "grad_norm": 6.099815436755307e-05, + "learning_rate": 5.964857039911786e-09, + "loss": 0.0, + "num_input_tokens_seen": 18064464, + "step": 36700 + }, + { + "epoch": 4.8442655404513655, + "grad_norm": 0.9859268665313721, + "learning_rate": 5.914718684928766e-09, + "loss": 0.001, + "num_input_tokens_seen": 18066896, + "step": 36705 + }, + { + "epoch": 4.844925432229115, + "grad_norm": 3.101829861407168e-05, + "learning_rate": 5.864791317348183e-09, + "loss": 0.0009, + "num_input_tokens_seen": 18069328, + "step": 36710 + }, + { + "epoch": 4.845585324006863, + "grad_norm": 1.0897661923081614e-05, + "learning_rate": 5.815074947766674e-09, + "loss": 0.0, + "num_input_tokens_seen": 18071888, + "step": 36715 + }, + { + "epoch": 4.846245215784611, + "grad_norm": 0.0002581208827905357, + "learning_rate": 5.76556958673613e-09, + "loss": 0.0, + "num_input_tokens_seen": 18074448, + "step": 36720 + }, + { + "epoch": 4.8469051075623595, + "grad_norm": 2.9608701879624277e-05, + "learning_rate": 5.716275244763813e-09, + "loss": 0.0, + "num_input_tokens_seen": 18077008, + "step": 36725 + }, + { + "epoch": 4.847564999340108, + "grad_norm": 0.0004536760679911822, + "learning_rate": 5.667191932312021e-09, + "loss": 0.0, + "num_input_tokens_seen": 18079952, + "step": 36730 + }, + { + "epoch": 4.848224891117857, + "grad_norm": 2.2530020942213014e-05, + "learning_rate": 5.61831965979831e-09, + "loss": 0.0, + "num_input_tokens_seen": 18082384, + "step": 36735 + }, + { + "epoch": 4.848884782895605, + "grad_norm": 0.0021661531645804644, + "learning_rate": 5.5696584375956036e-09, + "loss": 0.0, + "num_input_tokens_seen": 18084816, + "step": 36740 + }, + { + "epoch": 4.8495446746733535, + "grad_norm": 0.00015442879521287978, + "learning_rate": 5.5212082760316415e-09, + "loss": 0.0, + "num_input_tokens_seen": 18087120, + "step": 36745 + }, + { + "epoch": 4.850204566451102, + "grad_norm": 1.039473954733694e-05, + "learning_rate": 5.472969185389975e-09, + "loss": 0.0, + "num_input_tokens_seen": 18089744, + "step": 36750 + }, + { + "epoch": 4.85086445822885, + "grad_norm": 0.0004750068474095315, + "learning_rate": 5.424941175908637e-09, + "loss": 0.0, + "num_input_tokens_seen": 18092368, + "step": 36755 + }, + { + "epoch": 4.851524350006599, + "grad_norm": 0.000852764758747071, + "learning_rate": 5.377124257781473e-09, + "loss": 0.0, + "num_input_tokens_seen": 18094928, + "step": 36760 + }, + { + "epoch": 4.8521842417843475, + "grad_norm": 3.803055369644426e-05, + "learning_rate": 5.329518441157144e-09, + "loss": 0.0, + "num_input_tokens_seen": 18097040, + "step": 36765 + }, + { + "epoch": 4.852844133562096, + "grad_norm": 0.005006958730518818, + "learning_rate": 5.282123736139677e-09, + "loss": 0.0003, + "num_input_tokens_seen": 18099280, + "step": 36770 + }, + { + "epoch": 4.853504025339844, + "grad_norm": 1.367265394947026e-05, + "learning_rate": 5.234940152788358e-09, + "loss": 0.0, + "num_input_tokens_seen": 18101520, + "step": 36775 + }, + { + "epoch": 4.854163917117592, + "grad_norm": 0.00015550132957287133, + "learning_rate": 5.187967701117401e-09, + "loss": 0.0, + "num_input_tokens_seen": 18103632, + "step": 36780 + }, + { + "epoch": 4.8548238088953415, + "grad_norm": 2.042776941379998e-05, + "learning_rate": 5.141206391096387e-09, + "loss": 0.0, + "num_input_tokens_seen": 18106000, + "step": 36785 + }, + { + "epoch": 4.85548370067309, + "grad_norm": 0.0007538821664638817, + "learning_rate": 5.094656232650263e-09, + "loss": 0.0066, + "num_input_tokens_seen": 18108624, + "step": 36790 + }, + { + "epoch": 4.856143592450838, + "grad_norm": 9.818230319069698e-05, + "learning_rate": 5.0483172356586835e-09, + "loss": 0.0, + "num_input_tokens_seen": 18111120, + "step": 36795 + }, + { + "epoch": 4.856803484228586, + "grad_norm": 0.00048822807730175555, + "learning_rate": 5.002189409956892e-09, + "loss": 0.0, + "num_input_tokens_seen": 18113424, + "step": 36800 + }, + { + "epoch": 4.857463376006335, + "grad_norm": 0.013466020114719868, + "learning_rate": 4.956272765335278e-09, + "loss": 0.0, + "num_input_tokens_seen": 18115792, + "step": 36805 + }, + { + "epoch": 4.858123267784084, + "grad_norm": 4.1604354919400066e-05, + "learning_rate": 4.91056731153916e-09, + "loss": 0.0039, + "num_input_tokens_seen": 18118416, + "step": 36810 + }, + { + "epoch": 4.858783159561832, + "grad_norm": 0.00016768813657108694, + "learning_rate": 4.865073058269331e-09, + "loss": 0.0, + "num_input_tokens_seen": 18120464, + "step": 36815 + }, + { + "epoch": 4.85944305133958, + "grad_norm": 2.7609599783318117e-05, + "learning_rate": 4.819790015181513e-09, + "loss": 0.0, + "num_input_tokens_seen": 18122960, + "step": 36820 + }, + { + "epoch": 4.860102943117329, + "grad_norm": 2.8394108085194603e-05, + "learning_rate": 4.774718191886684e-09, + "loss": 0.0657, + "num_input_tokens_seen": 18125520, + "step": 36825 + }, + { + "epoch": 4.860762834895077, + "grad_norm": 5.970145866740495e-05, + "learning_rate": 4.729857597951081e-09, + "loss": 0.0, + "num_input_tokens_seen": 18128080, + "step": 36830 + }, + { + "epoch": 4.861422726672826, + "grad_norm": 0.001574253081344068, + "learning_rate": 4.685208242896088e-09, + "loss": 0.0, + "num_input_tokens_seen": 18130064, + "step": 36835 + }, + { + "epoch": 4.862082618450574, + "grad_norm": 2.4492735974490643e-05, + "learning_rate": 4.6407701361981246e-09, + "loss": 0.0, + "num_input_tokens_seen": 18132688, + "step": 36840 + }, + { + "epoch": 4.862742510228323, + "grad_norm": 7.663365977350622e-05, + "learning_rate": 4.5965432872888675e-09, + "loss": 0.001, + "num_input_tokens_seen": 18135248, + "step": 36845 + }, + { + "epoch": 4.863402402006071, + "grad_norm": 1.9701075871125795e-05, + "learning_rate": 4.552527705555032e-09, + "loss": 0.0, + "num_input_tokens_seen": 18137808, + "step": 36850 + }, + { + "epoch": 4.864062293783819, + "grad_norm": 4.3438776629045606e-05, + "learning_rate": 4.5087234003388094e-09, + "loss": 0.001, + "num_input_tokens_seen": 18139984, + "step": 36855 + }, + { + "epoch": 4.864722185561568, + "grad_norm": 0.00038492324529215693, + "learning_rate": 4.465130380937321e-09, + "loss": 0.0, + "num_input_tokens_seen": 18142288, + "step": 36860 + }, + { + "epoch": 4.865382077339317, + "grad_norm": 2.848833719326649e-05, + "learning_rate": 4.42174865660283e-09, + "loss": 0.0, + "num_input_tokens_seen": 18144912, + "step": 36865 + }, + { + "epoch": 4.866041969117065, + "grad_norm": 6.18175690760836e-05, + "learning_rate": 4.37857823654264e-09, + "loss": 0.0, + "num_input_tokens_seen": 18147664, + "step": 36870 + }, + { + "epoch": 4.866701860894813, + "grad_norm": 13.880156517028809, + "learning_rate": 4.335619129919643e-09, + "loss": 0.0061, + "num_input_tokens_seen": 18150160, + "step": 36875 + }, + { + "epoch": 4.867361752672561, + "grad_norm": 0.6847956776618958, + "learning_rate": 4.292871345851323e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18152656, + "step": 36880 + }, + { + "epoch": 4.868021644450311, + "grad_norm": 2.3505672288592905e-05, + "learning_rate": 4.250334893410867e-09, + "loss": 0.0, + "num_input_tokens_seen": 18155024, + "step": 36885 + }, + { + "epoch": 4.868681536228059, + "grad_norm": 0.00011453049228293821, + "learning_rate": 4.208009781626054e-09, + "loss": 0.0, + "num_input_tokens_seen": 18157584, + "step": 36890 + }, + { + "epoch": 4.869341428005807, + "grad_norm": 0.0018516669515520334, + "learning_rate": 4.165896019480253e-09, + "loss": 0.0133, + "num_input_tokens_seen": 18160080, + "step": 36895 + }, + { + "epoch": 4.870001319783555, + "grad_norm": 0.0003991833655163646, + "learning_rate": 4.123993615911759e-09, + "loss": 0.0, + "num_input_tokens_seen": 18162384, + "step": 36900 + }, + { + "epoch": 4.870661211561304, + "grad_norm": 0.0008322819485329092, + "learning_rate": 4.082302579814012e-09, + "loss": 0.0, + "num_input_tokens_seen": 18165008, + "step": 36905 + }, + { + "epoch": 4.871321103339053, + "grad_norm": 0.00972724612802267, + "learning_rate": 4.040822920035713e-09, + "loss": 0.0054, + "num_input_tokens_seen": 18167248, + "step": 36910 + }, + { + "epoch": 4.871980995116801, + "grad_norm": 0.001637786510400474, + "learning_rate": 3.999554645380487e-09, + "loss": 0.0, + "num_input_tokens_seen": 18169680, + "step": 36915 + }, + { + "epoch": 4.872640886894549, + "grad_norm": 0.0003935332060791552, + "learning_rate": 3.958497764607438e-09, + "loss": 0.0, + "num_input_tokens_seen": 18171920, + "step": 36920 + }, + { + "epoch": 4.873300778672298, + "grad_norm": 0.0012009014608338475, + "learning_rate": 3.917652286430484e-09, + "loss": 0.0, + "num_input_tokens_seen": 18174352, + "step": 36925 + }, + { + "epoch": 4.873960670450046, + "grad_norm": 0.15180867910385132, + "learning_rate": 3.87701821951869e-09, + "loss": 0.0549, + "num_input_tokens_seen": 18176848, + "step": 36930 + }, + { + "epoch": 4.874620562227794, + "grad_norm": 0.00013223606219980866, + "learning_rate": 3.836595572496493e-09, + "loss": 0.0, + "num_input_tokens_seen": 18179280, + "step": 36935 + }, + { + "epoch": 4.875280454005543, + "grad_norm": 3.8269990909611806e-05, + "learning_rate": 3.796384353943138e-09, + "loss": 0.0, + "num_input_tokens_seen": 18181776, + "step": 36940 + }, + { + "epoch": 4.875940345783292, + "grad_norm": 0.12301933020353317, + "learning_rate": 3.756384572393357e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18184528, + "step": 36945 + }, + { + "epoch": 4.87660023756104, + "grad_norm": 0.005153327248990536, + "learning_rate": 3.7165962363366888e-09, + "loss": 0.0, + "num_input_tokens_seen": 18187408, + "step": 36950 + }, + { + "epoch": 4.877260129338788, + "grad_norm": 5.5060783779481426e-05, + "learning_rate": 3.677019354217936e-09, + "loss": 0.0, + "num_input_tokens_seen": 18189968, + "step": 36955 + }, + { + "epoch": 4.8779200211165366, + "grad_norm": 2.651229988259729e-05, + "learning_rate": 3.637653934437046e-09, + "loss": 0.0, + "num_input_tokens_seen": 18192400, + "step": 36960 + }, + { + "epoch": 4.878579912894286, + "grad_norm": 1.6487107131979428e-05, + "learning_rate": 3.5984999853490017e-09, + "loss": 0.0, + "num_input_tokens_seen": 18195024, + "step": 36965 + }, + { + "epoch": 4.879239804672034, + "grad_norm": 0.002100097481161356, + "learning_rate": 3.5595575152639333e-09, + "loss": 0.0, + "num_input_tokens_seen": 18197456, + "step": 36970 + }, + { + "epoch": 4.879899696449782, + "grad_norm": 0.001038621529005468, + "learning_rate": 3.5208265324472297e-09, + "loss": 0.0, + "num_input_tokens_seen": 18200272, + "step": 36975 + }, + { + "epoch": 4.8805595882275306, + "grad_norm": 0.00012049706128891557, + "learning_rate": 3.4823070451190926e-09, + "loss": 0.0, + "num_input_tokens_seen": 18202960, + "step": 36980 + }, + { + "epoch": 4.881219480005279, + "grad_norm": 0.0003379734989721328, + "learning_rate": 3.443999061455094e-09, + "loss": 0.0, + "num_input_tokens_seen": 18205136, + "step": 36985 + }, + { + "epoch": 4.881879371783027, + "grad_norm": 0.008252648636698723, + "learning_rate": 3.4059025895857295e-09, + "loss": 0.0, + "num_input_tokens_seen": 18207376, + "step": 36990 + }, + { + "epoch": 4.882539263560776, + "grad_norm": 4.569829616229981e-05, + "learning_rate": 3.368017637596865e-09, + "loss": 0.0, + "num_input_tokens_seen": 18210000, + "step": 36995 + }, + { + "epoch": 4.8831991553385246, + "grad_norm": 0.0096468236297369, + "learning_rate": 3.330344213529179e-09, + "loss": 0.0, + "num_input_tokens_seen": 18212560, + "step": 37000 + }, + { + "epoch": 4.883859047116273, + "grad_norm": 2.2760867068427615e-05, + "learning_rate": 3.29288232537861e-09, + "loss": 0.0, + "num_input_tokens_seen": 18214800, + "step": 37005 + }, + { + "epoch": 4.884518938894021, + "grad_norm": 0.0077208224684000015, + "learning_rate": 3.2556319810961297e-09, + "loss": 0.0, + "num_input_tokens_seen": 18217168, + "step": 37010 + }, + { + "epoch": 4.885178830671769, + "grad_norm": 0.00016451945703011006, + "learning_rate": 3.21859318858797e-09, + "loss": 0.0, + "num_input_tokens_seen": 18219920, + "step": 37015 + }, + { + "epoch": 4.885838722449519, + "grad_norm": 0.00011153052764711902, + "learning_rate": 3.1817659557152876e-09, + "loss": 0.0, + "num_input_tokens_seen": 18222352, + "step": 37020 + }, + { + "epoch": 4.886498614227267, + "grad_norm": 1.4121486856311094e-05, + "learning_rate": 3.1451502902943848e-09, + "loss": 0.0, + "num_input_tokens_seen": 18224848, + "step": 37025 + }, + { + "epoch": 4.887158506005015, + "grad_norm": 108.1161880493164, + "learning_rate": 3.1087462000967124e-09, + "loss": 0.0782, + "num_input_tokens_seen": 18227280, + "step": 37030 + }, + { + "epoch": 4.887818397782763, + "grad_norm": 0.00013722782023251057, + "learning_rate": 3.0725536928486452e-09, + "loss": 0.001, + "num_input_tokens_seen": 18229712, + "step": 37035 + }, + { + "epoch": 4.888478289560512, + "grad_norm": 0.00022701297712046653, + "learning_rate": 3.036572776231927e-09, + "loss": 0.0, + "num_input_tokens_seen": 18232400, + "step": 37040 + }, + { + "epoch": 4.889138181338261, + "grad_norm": 0.00024063313321676105, + "learning_rate": 3.0008034578832274e-09, + "loss": 0.0213, + "num_input_tokens_seen": 18234896, + "step": 37045 + }, + { + "epoch": 4.889798073116009, + "grad_norm": 0.0005864155828021467, + "learning_rate": 2.9652457453942515e-09, + "loss": 0.0, + "num_input_tokens_seen": 18237392, + "step": 37050 + }, + { + "epoch": 4.890457964893757, + "grad_norm": 1.8028958947979845e-05, + "learning_rate": 2.9298996463119618e-09, + "loss": 0.0, + "num_input_tokens_seen": 18240080, + "step": 37055 + }, + { + "epoch": 4.891117856671506, + "grad_norm": 1.7613796444493346e-05, + "learning_rate": 2.894765168138247e-09, + "loss": 0.0, + "num_input_tokens_seen": 18242448, + "step": 37060 + }, + { + "epoch": 4.891777748449254, + "grad_norm": 3.3598429581616074e-05, + "learning_rate": 2.85984231833003e-09, + "loss": 0.0164, + "num_input_tokens_seen": 18244880, + "step": 37065 + }, + { + "epoch": 4.892437640227003, + "grad_norm": 19.759035110473633, + "learning_rate": 2.825131104299716e-09, + "loss": 0.0466, + "num_input_tokens_seen": 18247248, + "step": 37070 + }, + { + "epoch": 4.893097532004751, + "grad_norm": 0.006311311852186918, + "learning_rate": 2.7906315334143004e-09, + "loss": 0.0, + "num_input_tokens_seen": 18249680, + "step": 37075 + }, + { + "epoch": 4.8937574237825, + "grad_norm": 33.24702835083008, + "learning_rate": 2.756343612996148e-09, + "loss": 0.0072, + "num_input_tokens_seen": 18251920, + "step": 37080 + }, + { + "epoch": 4.894417315560248, + "grad_norm": 3.692014797707088e-05, + "learning_rate": 2.722267350322549e-09, + "loss": 0.0, + "num_input_tokens_seen": 18254608, + "step": 37085 + }, + { + "epoch": 4.895077207337996, + "grad_norm": 1.450990566809196e-05, + "learning_rate": 2.6884027526259403e-09, + "loss": 0.0, + "num_input_tokens_seen": 18257040, + "step": 37090 + }, + { + "epoch": 4.895737099115745, + "grad_norm": 2.8280426704441197e-05, + "learning_rate": 2.654749827093905e-09, + "loss": 0.0, + "num_input_tokens_seen": 18259472, + "step": 37095 + }, + { + "epoch": 4.896396990893494, + "grad_norm": 0.0005788062699139118, + "learning_rate": 2.6213085808691747e-09, + "loss": 0.0352, + "num_input_tokens_seen": 18261840, + "step": 37100 + }, + { + "epoch": 4.897056882671242, + "grad_norm": 2.147852319467347e-05, + "learning_rate": 2.588079021049072e-09, + "loss": 0.0, + "num_input_tokens_seen": 18264464, + "step": 37105 + }, + { + "epoch": 4.89771677444899, + "grad_norm": 0.00012815039372071624, + "learning_rate": 2.5550611546866217e-09, + "loss": 0.0, + "num_input_tokens_seen": 18266832, + "step": 37110 + }, + { + "epoch": 4.8983766662267385, + "grad_norm": 4.9210022552870214e-05, + "learning_rate": 2.5222549887893295e-09, + "loss": 0.0, + "num_input_tokens_seen": 18269264, + "step": 37115 + }, + { + "epoch": 4.899036558004488, + "grad_norm": 3.040397677978035e-05, + "learning_rate": 2.4896605303204034e-09, + "loss": 0.0, + "num_input_tokens_seen": 18271760, + "step": 37120 + }, + { + "epoch": 4.899696449782236, + "grad_norm": 1.7230806406587362e-05, + "learning_rate": 2.4572777861976425e-09, + "loss": 0.0, + "num_input_tokens_seen": 18274000, + "step": 37125 + }, + { + "epoch": 4.900356341559984, + "grad_norm": 0.00017458596266806126, + "learning_rate": 2.425106763293994e-09, + "loss": 0.0, + "num_input_tokens_seen": 18276368, + "step": 37130 + }, + { + "epoch": 4.9010162333377325, + "grad_norm": 0.021160904318094254, + "learning_rate": 2.393147468437551e-09, + "loss": 0.0, + "num_input_tokens_seen": 18278800, + "step": 37135 + }, + { + "epoch": 4.901676125115481, + "grad_norm": 0.0006504508783109486, + "learning_rate": 2.3613999084114434e-09, + "loss": 0.031, + "num_input_tokens_seen": 18281232, + "step": 37140 + }, + { + "epoch": 4.90233601689323, + "grad_norm": 2.090338966809213e-05, + "learning_rate": 2.329864089953837e-09, + "loss": 0.0, + "num_input_tokens_seen": 18283536, + "step": 37145 + }, + { + "epoch": 4.902995908670978, + "grad_norm": 4.0948219975689426e-05, + "learning_rate": 2.298540019758155e-09, + "loss": 0.0352, + "num_input_tokens_seen": 18285904, + "step": 37150 + }, + { + "epoch": 4.9036558004487265, + "grad_norm": 1.6493268049089238e-05, + "learning_rate": 2.2674277044724134e-09, + "loss": 0.0, + "num_input_tokens_seen": 18288336, + "step": 37155 + }, + { + "epoch": 4.904315692226475, + "grad_norm": 0.17416909337043762, + "learning_rate": 2.236527150700218e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18290832, + "step": 37160 + }, + { + "epoch": 4.904975584004223, + "grad_norm": 0.000696924515068531, + "learning_rate": 2.205838364999879e-09, + "loss": 0.0, + "num_input_tokens_seen": 18293264, + "step": 37165 + }, + { + "epoch": 4.905635475781972, + "grad_norm": 2.5053914214367978e-05, + "learning_rate": 2.1753613538849636e-09, + "loss": 0.0, + "num_input_tokens_seen": 18295632, + "step": 37170 + }, + { + "epoch": 4.9062953675597205, + "grad_norm": 0.068586066365242, + "learning_rate": 2.145096123823853e-09, + "loss": 0.0007, + "num_input_tokens_seen": 18298256, + "step": 37175 + }, + { + "epoch": 4.906955259337469, + "grad_norm": 5.68434115848504e-05, + "learning_rate": 2.1150426812401866e-09, + "loss": 0.0, + "num_input_tokens_seen": 18300304, + "step": 37180 + }, + { + "epoch": 4.907615151115217, + "grad_norm": 1.4777840988244861e-05, + "learning_rate": 2.0852010325125293e-09, + "loss": 0.0006, + "num_input_tokens_seen": 18302992, + "step": 37185 + }, + { + "epoch": 4.908275042892965, + "grad_norm": 0.0001635983062442392, + "learning_rate": 2.0555711839747026e-09, + "loss": 0.0, + "num_input_tokens_seen": 18305424, + "step": 37190 + }, + { + "epoch": 4.908934934670714, + "grad_norm": 1.687292751739733e-05, + "learning_rate": 2.0261531419153433e-09, + "loss": 0.0, + "num_input_tokens_seen": 18307664, + "step": 37195 + }, + { + "epoch": 4.909594826448463, + "grad_norm": 0.9634570479393005, + "learning_rate": 1.9969469125782346e-09, + "loss": 0.0004, + "num_input_tokens_seen": 18310288, + "step": 37200 + }, + { + "epoch": 4.910254718226211, + "grad_norm": 2.745349775068462e-05, + "learning_rate": 1.9679525021621955e-09, + "loss": 0.0, + "num_input_tokens_seen": 18312720, + "step": 37205 + }, + { + "epoch": 4.910914610003959, + "grad_norm": 1.8449822164257057e-05, + "learning_rate": 1.939169916820971e-09, + "loss": 0.0, + "num_input_tokens_seen": 18315536, + "step": 37210 + }, + { + "epoch": 4.911574501781708, + "grad_norm": 0.00042026498704217374, + "learning_rate": 1.910599162663673e-09, + "loss": 0.0, + "num_input_tokens_seen": 18317968, + "step": 37215 + }, + { + "epoch": 4.912234393559456, + "grad_norm": 0.01246301457285881, + "learning_rate": 1.8822402457540075e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18320528, + "step": 37220 + }, + { + "epoch": 4.912894285337205, + "grad_norm": 1.6955054888967425e-05, + "learning_rate": 1.8540931721110487e-09, + "loss": 0.0, + "num_input_tokens_seen": 18322960, + "step": 37225 + }, + { + "epoch": 4.913554177114953, + "grad_norm": 2.438508454360999e-05, + "learning_rate": 1.8261579477087951e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18325584, + "step": 37230 + }, + { + "epoch": 4.914214068892702, + "grad_norm": 2.5170236767735332e-05, + "learning_rate": 1.7984345784763932e-09, + "loss": 0.0, + "num_input_tokens_seen": 18328016, + "step": 37235 + }, + { + "epoch": 4.91487396067045, + "grad_norm": 2.092982322210446e-05, + "learning_rate": 1.770923070297803e-09, + "loss": 0.0, + "num_input_tokens_seen": 18330256, + "step": 37240 + }, + { + "epoch": 4.915533852448198, + "grad_norm": 0.019744206219911575, + "learning_rate": 1.743623429012131e-09, + "loss": 0.0533, + "num_input_tokens_seen": 18333072, + "step": 37245 + }, + { + "epoch": 4.9161937442259465, + "grad_norm": 7.678641122765839e-05, + "learning_rate": 1.7165356604136317e-09, + "loss": 0.0, + "num_input_tokens_seen": 18335440, + "step": 37250 + }, + { + "epoch": 4.916853636003696, + "grad_norm": 4.612092743627727e-05, + "learning_rate": 1.6896597702514837e-09, + "loss": 0.0, + "num_input_tokens_seen": 18337680, + "step": 37255 + }, + { + "epoch": 4.917513527781444, + "grad_norm": 5.102691173553467, + "learning_rate": 1.6629957642297908e-09, + "loss": 0.0028, + "num_input_tokens_seen": 18340496, + "step": 37260 + }, + { + "epoch": 4.918173419559192, + "grad_norm": 0.002844721544533968, + "learning_rate": 1.6365436480079153e-09, + "loss": 0.0, + "num_input_tokens_seen": 18342928, + "step": 37265 + }, + { + "epoch": 4.9188333113369405, + "grad_norm": 7.894792361184955e-05, + "learning_rate": 1.6103034272000326e-09, + "loss": 0.0, + "num_input_tokens_seen": 18345488, + "step": 37270 + }, + { + "epoch": 4.919493203114689, + "grad_norm": 2.4739310902077705e-05, + "learning_rate": 1.5842751073753546e-09, + "loss": 0.0, + "num_input_tokens_seen": 18348112, + "step": 37275 + }, + { + "epoch": 4.920153094892438, + "grad_norm": 6.59223078400828e-05, + "learning_rate": 1.5584586940584622e-09, + "loss": 0.0, + "num_input_tokens_seen": 18350416, + "step": 37280 + }, + { + "epoch": 4.920812986670186, + "grad_norm": 0.00019666607840918005, + "learning_rate": 1.5328541927286387e-09, + "loss": 0.028, + "num_input_tokens_seen": 18352912, + "step": 37285 + }, + { + "epoch": 4.9214728784479345, + "grad_norm": 5.4916919907554984e-05, + "learning_rate": 1.507461608819982e-09, + "loss": 0.0, + "num_input_tokens_seen": 18355536, + "step": 37290 + }, + { + "epoch": 4.922132770225683, + "grad_norm": 6.035666592651978e-05, + "learning_rate": 1.4822809477222919e-09, + "loss": 0.0, + "num_input_tokens_seen": 18357840, + "step": 37295 + }, + { + "epoch": 4.922792662003431, + "grad_norm": 3.3862557756947353e-05, + "learning_rate": 1.457312214779627e-09, + "loss": 0.0004, + "num_input_tokens_seen": 18360336, + "step": 37300 + }, + { + "epoch": 4.92345255378118, + "grad_norm": 0.0001764145854394883, + "learning_rate": 1.4325554152916364e-09, + "loss": 0.0, + "num_input_tokens_seen": 18363216, + "step": 37305 + }, + { + "epoch": 4.9241124455589285, + "grad_norm": 1.4086094779486302e-05, + "learning_rate": 1.408010554512673e-09, + "loss": 0.0, + "num_input_tokens_seen": 18365968, + "step": 37310 + }, + { + "epoch": 4.924772337336677, + "grad_norm": 2.1404019207693636e-05, + "learning_rate": 1.3836776376522364e-09, + "loss": 0.0, + "num_input_tokens_seen": 18368400, + "step": 37315 + }, + { + "epoch": 4.925432229114425, + "grad_norm": 1.2312765647948254e-05, + "learning_rate": 1.3595566698748617e-09, + "loss": 0.0, + "num_input_tokens_seen": 18370704, + "step": 37320 + }, + { + "epoch": 4.926092120892173, + "grad_norm": 5.9814472479047254e-05, + "learning_rate": 1.3356476562998986e-09, + "loss": 0.0, + "num_input_tokens_seen": 18373264, + "step": 37325 + }, + { + "epoch": 4.9267520126699225, + "grad_norm": 0.13493262231349945, + "learning_rate": 1.3119506020020653e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18375632, + "step": 37330 + }, + { + "epoch": 4.927411904447671, + "grad_norm": 0.0025102654471993446, + "learning_rate": 1.2884655120107835e-09, + "loss": 0.0226, + "num_input_tokens_seen": 18378192, + "step": 37335 + }, + { + "epoch": 4.928071796225419, + "grad_norm": 2.4892946385079995e-05, + "learning_rate": 1.26519239131051e-09, + "loss": 0.0, + "num_input_tokens_seen": 18380560, + "step": 37340 + }, + { + "epoch": 4.928731688003167, + "grad_norm": 0.0018648894038051367, + "learning_rate": 1.2421312448408494e-09, + "loss": 0.0, + "num_input_tokens_seen": 18383248, + "step": 37345 + }, + { + "epoch": 4.929391579780916, + "grad_norm": 1.4500590562820435, + "learning_rate": 1.2192820774965529e-09, + "loss": 0.0014, + "num_input_tokens_seen": 18385616, + "step": 37350 + }, + { + "epoch": 4.930051471558665, + "grad_norm": 0.009046858176589012, + "learning_rate": 1.1966448941269635e-09, + "loss": 0.0, + "num_input_tokens_seen": 18387728, + "step": 37355 + }, + { + "epoch": 4.930711363336413, + "grad_norm": 5.444921043817885e-05, + "learning_rate": 1.1742196995366827e-09, + "loss": 0.0, + "num_input_tokens_seen": 18390096, + "step": 37360 + }, + { + "epoch": 4.931371255114161, + "grad_norm": 5.099747431813739e-05, + "learning_rate": 1.1520064984853473e-09, + "loss": 0.0, + "num_input_tokens_seen": 18392656, + "step": 37365 + }, + { + "epoch": 4.93203114689191, + "grad_norm": 1.4332696082419716e-05, + "learning_rate": 1.1300052956876304e-09, + "loss": 0.0, + "num_input_tokens_seen": 18395088, + "step": 37370 + }, + { + "epoch": 4.932691038669658, + "grad_norm": 0.00013304037565831095, + "learning_rate": 1.1082160958129082e-09, + "loss": 0.0, + "num_input_tokens_seen": 18397456, + "step": 37375 + }, + { + "epoch": 4.933350930447407, + "grad_norm": 0.0004227849130984396, + "learning_rate": 1.0866389034860368e-09, + "loss": 0.0, + "num_input_tokens_seen": 18400016, + "step": 37380 + }, + { + "epoch": 4.934010822225155, + "grad_norm": 0.00013109679275657982, + "learning_rate": 1.0652737232864639e-09, + "loss": 0.0, + "num_input_tokens_seen": 18402640, + "step": 37385 + }, + { + "epoch": 4.934670714002904, + "grad_norm": 0.008443259634077549, + "learning_rate": 1.0441205597487845e-09, + "loss": 0.0, + "num_input_tokens_seen": 18405072, + "step": 37390 + }, + { + "epoch": 4.935330605780652, + "grad_norm": 0.0001453846925869584, + "learning_rate": 1.0231794173626296e-09, + "loss": 0.0001, + "num_input_tokens_seen": 18407440, + "step": 37395 + }, + { + "epoch": 4.9359904975584, + "grad_norm": 4.117728894925676e-05, + "learning_rate": 1.002450300572666e-09, + "loss": 0.0003, + "num_input_tokens_seen": 18409872, + "step": 37400 + }, + { + "epoch": 4.936650389336149, + "grad_norm": 0.0007367845973931253, + "learning_rate": 9.819332137784853e-10, + "loss": 0.0, + "num_input_tokens_seen": 18412432, + "step": 37405 + }, + { + "epoch": 4.937310281113898, + "grad_norm": 4.505567267187871e-05, + "learning_rate": 9.616281613347155e-10, + "loss": 0.0016, + "num_input_tokens_seen": 18414608, + "step": 37410 + }, + { + "epoch": 4.937970172891646, + "grad_norm": 0.00013725746248383075, + "learning_rate": 9.415351475507982e-10, + "loss": 0.0044, + "num_input_tokens_seen": 18417424, + "step": 37415 + }, + { + "epoch": 4.938630064669394, + "grad_norm": 3.152512726956047e-05, + "learning_rate": 9.216541766914332e-10, + "loss": 0.0, + "num_input_tokens_seen": 18419664, + "step": 37420 + }, + { + "epoch": 4.939289956447142, + "grad_norm": 0.0017625819891691208, + "learning_rate": 9.019852529762451e-10, + "loss": 0.0001, + "num_input_tokens_seen": 18422352, + "step": 37425 + }, + { + "epoch": 4.939949848224892, + "grad_norm": 2.2083482690504752e-05, + "learning_rate": 8.825283805796724e-10, + "loss": 0.0, + "num_input_tokens_seen": 18424912, + "step": 37430 + }, + { + "epoch": 4.94060974000264, + "grad_norm": 5.681900802301243e-05, + "learning_rate": 8.632835636315227e-10, + "loss": 0.0, + "num_input_tokens_seen": 18427600, + "step": 37435 + }, + { + "epoch": 4.941269631780388, + "grad_norm": 2.003555346163921e-05, + "learning_rate": 8.442508062163068e-10, + "loss": 0.0, + "num_input_tokens_seen": 18430032, + "step": 37440 + }, + { + "epoch": 4.941929523558136, + "grad_norm": 0.016679493710398674, + "learning_rate": 8.254301123734597e-10, + "loss": 0.0, + "num_input_tokens_seen": 18432400, + "step": 37445 + }, + { + "epoch": 4.942589415335885, + "grad_norm": 0.0004292270168662071, + "learning_rate": 8.068214860976752e-10, + "loss": 0.0322, + "num_input_tokens_seen": 18434768, + "step": 37450 + }, + { + "epoch": 4.943249307113634, + "grad_norm": 14.158968925476074, + "learning_rate": 7.884249313383495e-10, + "loss": 0.0861, + "num_input_tokens_seen": 18436944, + "step": 37455 + }, + { + "epoch": 4.943909198891382, + "grad_norm": 0.00015048046770971268, + "learning_rate": 7.702404520002481e-10, + "loss": 0.0308, + "num_input_tokens_seen": 18439248, + "step": 37460 + }, + { + "epoch": 4.94456909066913, + "grad_norm": 5.5279640946537256e-05, + "learning_rate": 7.522680519426173e-10, + "loss": 0.0001, + "num_input_tokens_seen": 18441680, + "step": 37465 + }, + { + "epoch": 4.945228982446879, + "grad_norm": 1.82662970473757e-05, + "learning_rate": 7.345077349801832e-10, + "loss": 0.0, + "num_input_tokens_seen": 18443856, + "step": 37470 + }, + { + "epoch": 4.945888874224627, + "grad_norm": 0.039773765951395035, + "learning_rate": 7.169595048823751e-10, + "loss": 0.0, + "num_input_tokens_seen": 18446288, + "step": 37475 + }, + { + "epoch": 4.946548766002375, + "grad_norm": 4.541295528411865, + "learning_rate": 6.996233653736583e-10, + "loss": 0.0496, + "num_input_tokens_seen": 18448464, + "step": 37480 + }, + { + "epoch": 4.947208657780124, + "grad_norm": 5.383110692491755e-05, + "learning_rate": 6.824993201334228e-10, + "loss": 0.0, + "num_input_tokens_seen": 18450768, + "step": 37485 + }, + { + "epoch": 4.947868549557873, + "grad_norm": 2.5847257347777486e-05, + "learning_rate": 6.655873727963168e-10, + "loss": 0.0, + "num_input_tokens_seen": 18453136, + "step": 37490 + }, + { + "epoch": 4.948528441335621, + "grad_norm": 1.4880834896757733e-05, + "learning_rate": 6.488875269516914e-10, + "loss": 0.0, + "num_input_tokens_seen": 18455504, + "step": 37495 + }, + { + "epoch": 4.949188333113369, + "grad_norm": 6.817045505158603e-05, + "learning_rate": 6.323997861439334e-10, + "loss": 0.0, + "num_input_tokens_seen": 18458064, + "step": 37500 + }, + { + "epoch": 4.9498482248911175, + "grad_norm": 0.0013366822386160493, + "learning_rate": 6.16124153872466e-10, + "loss": 0.0, + "num_input_tokens_seen": 18460624, + "step": 37505 + }, + { + "epoch": 4.950508116668866, + "grad_norm": 0.00018459931015968323, + "learning_rate": 6.00060633591748e-10, + "loss": 0.0, + "num_input_tokens_seen": 18463120, + "step": 37510 + }, + { + "epoch": 4.951168008446615, + "grad_norm": 0.008064552210271358, + "learning_rate": 5.842092287109412e-10, + "loss": 0.0012, + "num_input_tokens_seen": 18465616, + "step": 37515 + }, + { + "epoch": 4.951827900224363, + "grad_norm": 0.059500906616449356, + "learning_rate": 5.685699425945767e-10, + "loss": 0.0, + "num_input_tokens_seen": 18467984, + "step": 37520 + }, + { + "epoch": 4.9524877920021115, + "grad_norm": 0.020492108538746834, + "learning_rate": 5.531427785619991e-10, + "loss": 0.0, + "num_input_tokens_seen": 18470032, + "step": 37525 + }, + { + "epoch": 4.95314768377986, + "grad_norm": 2.5691040718811564e-05, + "learning_rate": 5.379277398873671e-10, + "loss": 0.0, + "num_input_tokens_seen": 18472336, + "step": 37530 + }, + { + "epoch": 4.953807575557608, + "grad_norm": 1.320360541343689, + "learning_rate": 5.229248298000976e-10, + "loss": 0.0022, + "num_input_tokens_seen": 18475088, + "step": 37535 + }, + { + "epoch": 4.954467467335357, + "grad_norm": 1.585727477504406e-05, + "learning_rate": 5.081340514843102e-10, + "loss": 0.0252, + "num_input_tokens_seen": 18477776, + "step": 37540 + }, + { + "epoch": 4.9551273591131055, + "grad_norm": 0.0001642427669139579, + "learning_rate": 4.935554080793825e-10, + "loss": 0.061, + "num_input_tokens_seen": 18480336, + "step": 37545 + }, + { + "epoch": 4.955787250890854, + "grad_norm": 1.2666053407883737e-05, + "learning_rate": 4.791889026793949e-10, + "loss": 0.0, + "num_input_tokens_seen": 18482640, + "step": 37550 + }, + { + "epoch": 4.956447142668602, + "grad_norm": 4.189881292404607e-05, + "learning_rate": 4.6503453833368623e-10, + "loss": 0.0, + "num_input_tokens_seen": 18484880, + "step": 37555 + }, + { + "epoch": 4.95710703444635, + "grad_norm": 1.872699249361176e-05, + "learning_rate": 4.5109231804629776e-10, + "loss": 0.0, + "num_input_tokens_seen": 18487696, + "step": 37560 + }, + { + "epoch": 4.9577669262240995, + "grad_norm": 12.058331489562988, + "learning_rate": 4.37362244776307e-10, + "loss": 0.0473, + "num_input_tokens_seen": 18490384, + "step": 37565 + }, + { + "epoch": 4.958426818001848, + "grad_norm": 0.00039062247378751636, + "learning_rate": 4.238443214380494e-10, + "loss": 0.0, + "num_input_tokens_seen": 18492816, + "step": 37570 + }, + { + "epoch": 4.959086709779596, + "grad_norm": 5.1051236368948594e-05, + "learning_rate": 4.105385509004522e-10, + "loss": 0.0, + "num_input_tokens_seen": 18495376, + "step": 37575 + }, + { + "epoch": 4.959746601557344, + "grad_norm": 1.5794721548445523e-05, + "learning_rate": 3.974449359875898e-10, + "loss": 0.0, + "num_input_tokens_seen": 18497616, + "step": 37580 + }, + { + "epoch": 4.960406493335093, + "grad_norm": 3.2267846108879894e-05, + "learning_rate": 3.8456347947835034e-10, + "loss": 0.0, + "num_input_tokens_seen": 18500048, + "step": 37585 + }, + { + "epoch": 4.961066385112842, + "grad_norm": 1.5826091839699075e-05, + "learning_rate": 3.7189418410699114e-10, + "loss": 0.0, + "num_input_tokens_seen": 18502608, + "step": 37590 + }, + { + "epoch": 4.96172627689059, + "grad_norm": 8.943623106461018e-05, + "learning_rate": 3.5943705256236136e-10, + "loss": 0.0044, + "num_input_tokens_seen": 18505104, + "step": 37595 + }, + { + "epoch": 4.962386168668338, + "grad_norm": 0.00010258001566398889, + "learning_rate": 3.4719208748834607e-10, + "loss": 0.0, + "num_input_tokens_seen": 18507536, + "step": 37600 + }, + { + "epoch": 4.963046060446087, + "grad_norm": 0.006609190255403519, + "learning_rate": 3.3515929148397737e-10, + "loss": 0.0001, + "num_input_tokens_seen": 18510032, + "step": 37605 + }, + { + "epoch": 4.963705952223835, + "grad_norm": 7.628079038113356e-05, + "learning_rate": 3.2333866710299027e-10, + "loss": 0.0, + "num_input_tokens_seen": 18512592, + "step": 37610 + }, + { + "epoch": 4.964365844001584, + "grad_norm": 1.7201859009219334e-05, + "learning_rate": 3.1173021685426684e-10, + "loss": 0.0, + "num_input_tokens_seen": 18514704, + "step": 37615 + }, + { + "epoch": 4.965025735779332, + "grad_norm": 3.107169322902337e-05, + "learning_rate": 3.003339432016139e-10, + "loss": 0.0095, + "num_input_tokens_seen": 18517328, + "step": 37620 + }, + { + "epoch": 4.965685627557081, + "grad_norm": 0.0002109938650391996, + "learning_rate": 2.891498485638744e-10, + "loss": 0.0006, + "num_input_tokens_seen": 18520080, + "step": 37625 + }, + { + "epoch": 4.966345519334829, + "grad_norm": 7.280964928213507e-05, + "learning_rate": 2.781779353147051e-10, + "loss": 0.0, + "num_input_tokens_seen": 18522384, + "step": 37630 + }, + { + "epoch": 4.967005411112577, + "grad_norm": 0.00024594253045506775, + "learning_rate": 2.6741820578290997e-10, + "loss": 0.0, + "num_input_tokens_seen": 18525392, + "step": 37635 + }, + { + "epoch": 4.967665302890326, + "grad_norm": 13.583490371704102, + "learning_rate": 2.568706622519956e-10, + "loss": 0.0322, + "num_input_tokens_seen": 18527760, + "step": 37640 + }, + { + "epoch": 4.968325194668075, + "grad_norm": 0.23574484884738922, + "learning_rate": 2.465353069608378e-10, + "loss": 0.0002, + "num_input_tokens_seen": 18530000, + "step": 37645 + }, + { + "epoch": 4.968985086445823, + "grad_norm": 0.005555503070354462, + "learning_rate": 2.3641214210279314e-10, + "loss": 0.0836, + "num_input_tokens_seen": 18532624, + "step": 37650 + }, + { + "epoch": 4.969644978223571, + "grad_norm": 0.00010899443441303447, + "learning_rate": 2.265011698266983e-10, + "loss": 0.0, + "num_input_tokens_seen": 18535056, + "step": 37655 + }, + { + "epoch": 4.9703048700013195, + "grad_norm": 1.8328459191252477e-05, + "learning_rate": 2.168023922357598e-10, + "loss": 0.0, + "num_input_tokens_seen": 18537488, + "step": 37660 + }, + { + "epoch": 4.970964761779069, + "grad_norm": 0.0003537725133355707, + "learning_rate": 2.0731581138877518e-10, + "loss": 0.0, + "num_input_tokens_seen": 18539792, + "step": 37665 + }, + { + "epoch": 4.971624653556817, + "grad_norm": 5.8356781664770097e-05, + "learning_rate": 1.980414292990229e-10, + "loss": 0.0, + "num_input_tokens_seen": 18542224, + "step": 37670 + }, + { + "epoch": 4.972284545334565, + "grad_norm": 1.1076231203333009e-05, + "learning_rate": 1.889792479350394e-10, + "loss": 0.0, + "num_input_tokens_seen": 18544912, + "step": 37675 + }, + { + "epoch": 4.9729444371123135, + "grad_norm": 0.002201800001785159, + "learning_rate": 1.8012926922017502e-10, + "loss": 0.0, + "num_input_tokens_seen": 18547280, + "step": 37680 + }, + { + "epoch": 4.973604328890062, + "grad_norm": 1.901209179777652e-05, + "learning_rate": 1.714914950327051e-10, + "loss": 0.0, + "num_input_tokens_seen": 18549840, + "step": 37685 + }, + { + "epoch": 4.974264220667811, + "grad_norm": 0.0002244171773782, + "learning_rate": 1.6306592720594093e-10, + "loss": 0.0, + "num_input_tokens_seen": 18552208, + "step": 37690 + }, + { + "epoch": 4.974924112445559, + "grad_norm": 0.01187474001199007, + "learning_rate": 1.5485256752822973e-10, + "loss": 0.0233, + "num_input_tokens_seen": 18554640, + "step": 37695 + }, + { + "epoch": 4.9755840042233075, + "grad_norm": 1.741272171784658e-05, + "learning_rate": 1.4685141774273268e-10, + "loss": 0.0, + "num_input_tokens_seen": 18557392, + "step": 37700 + }, + { + "epoch": 4.976243896001056, + "grad_norm": 0.00016994222823996097, + "learning_rate": 1.3906247954764694e-10, + "loss": 0.0, + "num_input_tokens_seen": 18559696, + "step": 37705 + }, + { + "epoch": 4.976903787778804, + "grad_norm": 1.2624673217942473e-05, + "learning_rate": 1.3148575459609457e-10, + "loss": 0.0, + "num_input_tokens_seen": 18561936, + "step": 37710 + }, + { + "epoch": 4.977563679556553, + "grad_norm": 0.0006298840162344277, + "learning_rate": 1.241212444962336e-10, + "loss": 0.0, + "num_input_tokens_seen": 18564432, + "step": 37715 + }, + { + "epoch": 4.9782235713343015, + "grad_norm": 0.037689466029405594, + "learning_rate": 1.169689508111471e-10, + "loss": 0.0, + "num_input_tokens_seen": 18566928, + "step": 37720 + }, + { + "epoch": 4.97888346311205, + "grad_norm": 1.6840593161759898e-05, + "learning_rate": 1.1002887505873193e-10, + "loss": 0.0, + "num_input_tokens_seen": 18569744, + "step": 37725 + }, + { + "epoch": 4.979543354889798, + "grad_norm": 0.2575373351573944, + "learning_rate": 1.0330101871214303e-10, + "loss": 0.0001, + "num_input_tokens_seen": 18572048, + "step": 37730 + }, + { + "epoch": 4.980203246667546, + "grad_norm": 2.676645090105012e-05, + "learning_rate": 9.678538319923824e-11, + "loss": 0.0, + "num_input_tokens_seen": 18574544, + "step": 37735 + }, + { + "epoch": 4.980863138445295, + "grad_norm": 1.6679727195878513e-05, + "learning_rate": 9.048196990280033e-11, + "loss": 0.0005, + "num_input_tokens_seen": 18577296, + "step": 37740 + }, + { + "epoch": 4.981523030223044, + "grad_norm": 0.0002634110569488257, + "learning_rate": 8.439078016087009e-11, + "loss": 0.0, + "num_input_tokens_seen": 18579600, + "step": 37745 + }, + { + "epoch": 4.982182922000792, + "grad_norm": 0.0004117540374863893, + "learning_rate": 7.851181526619122e-11, + "loss": 0.0997, + "num_input_tokens_seen": 18582224, + "step": 37750 + }, + { + "epoch": 4.98284281377854, + "grad_norm": 1.633869942452293e-05, + "learning_rate": 7.284507646654336e-11, + "loss": 0.0, + "num_input_tokens_seen": 18584336, + "step": 37755 + }, + { + "epoch": 4.983502705556289, + "grad_norm": 16.390350341796875, + "learning_rate": 6.739056496452011e-11, + "loss": 0.0472, + "num_input_tokens_seen": 18586896, + "step": 37760 + }, + { + "epoch": 4.984162597334037, + "grad_norm": 1.5212469406833407e-05, + "learning_rate": 6.214828191797305e-11, + "loss": 0.0001, + "num_input_tokens_seen": 18589520, + "step": 37765 + }, + { + "epoch": 4.984822489111786, + "grad_norm": 0.00015223871741909534, + "learning_rate": 5.711822843945669e-11, + "loss": 0.0324, + "num_input_tokens_seen": 18591952, + "step": 37770 + }, + { + "epoch": 4.985482380889534, + "grad_norm": 0.00014864149852655828, + "learning_rate": 5.230040559656146e-11, + "loss": 0.0, + "num_input_tokens_seen": 18594384, + "step": 37775 + }, + { + "epoch": 4.986142272667283, + "grad_norm": 5.9889051044592634e-05, + "learning_rate": 4.769481441191381e-11, + "loss": 0.0019, + "num_input_tokens_seen": 18596880, + "step": 37780 + }, + { + "epoch": 4.986802164445031, + "grad_norm": 0.9271034598350525, + "learning_rate": 4.330145586284306e-11, + "loss": 0.0038, + "num_input_tokens_seen": 18599632, + "step": 37785 + }, + { + "epoch": 4.987462056222779, + "grad_norm": 0.0002262652269564569, + "learning_rate": 3.912033088204758e-11, + "loss": 0.0, + "num_input_tokens_seen": 18602128, + "step": 37790 + }, + { + "epoch": 4.988121948000527, + "grad_norm": 2.7516141926753335e-05, + "learning_rate": 3.515144035670658e-11, + "loss": 0.0, + "num_input_tokens_seen": 18604496, + "step": 37795 + }, + { + "epoch": 4.988781839778277, + "grad_norm": 9.783699351828545e-05, + "learning_rate": 3.139478512936833e-11, + "loss": 0.0, + "num_input_tokens_seen": 18606992, + "step": 37800 + }, + { + "epoch": 4.989441731556025, + "grad_norm": 1.840178265410941e-05, + "learning_rate": 2.7850365997283963e-11, + "loss": 0.0, + "num_input_tokens_seen": 18609232, + "step": 37805 + }, + { + "epoch": 4.990101623333773, + "grad_norm": 8.516815432813019e-05, + "learning_rate": 2.4518183712740615e-11, + "loss": 0.0, + "num_input_tokens_seen": 18611472, + "step": 37810 + }, + { + "epoch": 4.990761515111521, + "grad_norm": 5.190412048250437e-05, + "learning_rate": 2.1398238982839324e-11, + "loss": 0.0, + "num_input_tokens_seen": 18613776, + "step": 37815 + }, + { + "epoch": 4.99142140688927, + "grad_norm": 1.6882424461073242e-05, + "learning_rate": 1.8490532470050168e-11, + "loss": 0.0, + "num_input_tokens_seen": 18616272, + "step": 37820 + }, + { + "epoch": 4.992081298667019, + "grad_norm": 0.00013847336231265217, + "learning_rate": 1.5795064791213065e-11, + "loss": 0.028, + "num_input_tokens_seen": 18618768, + "step": 37825 + }, + { + "epoch": 4.992741190444767, + "grad_norm": 0.00012111458636354655, + "learning_rate": 1.3311836518647978e-11, + "loss": 0.0, + "num_input_tokens_seen": 18621328, + "step": 37830 + }, + { + "epoch": 4.993401082222515, + "grad_norm": 0.02647862210869789, + "learning_rate": 1.104084817926676e-11, + "loss": 0.0, + "num_input_tokens_seen": 18623568, + "step": 37835 + }, + { + "epoch": 4.994060974000264, + "grad_norm": 0.00014961596752982587, + "learning_rate": 8.982100255128244e-12, + "loss": 0.0, + "num_input_tokens_seen": 18626000, + "step": 37840 + }, + { + "epoch": 4.994720865778012, + "grad_norm": 1.651822094572708e-05, + "learning_rate": 7.135593183216215e-12, + "loss": 0.0, + "num_input_tokens_seen": 18628496, + "step": 37845 + }, + { + "epoch": 4.995380757555761, + "grad_norm": 6.622510409215465e-05, + "learning_rate": 5.501327355328378e-12, + "loss": 0.0, + "num_input_tokens_seen": 18630928, + "step": 37850 + }, + { + "epoch": 4.996040649333509, + "grad_norm": 0.00030600413447245955, + "learning_rate": 4.0793031184094275e-12, + "loss": 0.0001, + "num_input_tokens_seen": 18633488, + "step": 37855 + }, + { + "epoch": 4.996700541111258, + "grad_norm": 1.81586710823467e-05, + "learning_rate": 2.8695207742179816e-12, + "loss": 0.0, + "num_input_tokens_seen": 18635984, + "step": 37860 + }, + { + "epoch": 4.997360432889006, + "grad_norm": 0.0009784384164959192, + "learning_rate": 1.8719805796596487e-12, + "loss": 0.0, + "num_input_tokens_seen": 18638288, + "step": 37865 + }, + { + "epoch": 4.998020324666754, + "grad_norm": 0.0030284288804978132, + "learning_rate": 1.086682746231915e-12, + "loss": 0.0, + "num_input_tokens_seen": 18640720, + "step": 37870 + }, + { + "epoch": 4.998680216444503, + "grad_norm": 3.776025914703496e-05, + "learning_rate": 5.136274408013008e-13, + "loss": 0.0, + "num_input_tokens_seen": 18642960, + "step": 37875 + }, + { + "epoch": 4.999340108222252, + "grad_norm": 2.063461215584539e-05, + "learning_rate": 1.5281478493722745e-13, + "loss": 0.0, + "num_input_tokens_seen": 18645008, + "step": 37880 + }, + { + "epoch": 5.0, + "grad_norm": 2.0273428162909113e-05, + "learning_rate": 4.244855245083556e-15, + "loss": 0.0001, + "num_input_tokens_seen": 18647328, + "step": 37885 + }, + { + "epoch": 5.0, + "num_input_tokens_seen": 18647328, + "step": 37885, + "total_flos": 1.0887944845433242e+17, + "train_loss": 0.05778941776209685, + "train_runtime": 3377.0005, + "train_samples_per_second": 89.745, + "train_steps_per_second": 11.219 + } + ], + "logging_steps": 5, + "max_steps": 37885, + "num_input_tokens_seen": 18647328, + "num_train_epochs": 5, + "save_steps": 1895, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0887944845433242e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..6c79dad --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66b58030769e17f159c6e254a4d89cea315a8135b8705c5c225420f0e0a8ead +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000..50b21fe Binary files /dev/null and b/training_eval_loss.png differ diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..d1da6cc Binary files /dev/null and b/training_loss.png differ